爬虫代码如下:
import scrapy
import json
import re
from plane.items import PlaneItem
class PlanespiderSpider(scrapy.Spider):
name = 'planespider'
allowed_domains = ['tuniu.com']
start_urls = ['https://flight-api.tuniu.com/query/flight/v2/list?callback=jQuery17207759591352059797_1565148905476&{%22withTransfer%22:true,%22passengers%22:[{%22count%22:1,%22psgType%22:%22ADT%22}],%22voyType%22:%22ST%22,%22voys%22:[{%22orgCity%22:%22CAN%22,%22dstCity%22:%22BJS%22,%22deptDate%22:%222019-08-09%22}],%22tokenKey%22:%22G92ZWFuZHBlYWNlMmxpZ2h0L3YyL2xpc%22,%22useToken%22:true}&_=1565148905672']
def parse(self, response):
plane_json = json.loads(re.match(".*?({.*}).*", response.body.decode(), re.S).group(1))
item = PlaneItem()
data = plane_json['data']
airbasic = data['airBasic']
for each in airbasic['flightMap']:
item['airCom'] = each['airCom']
item['flightNo'] = each['flightNo']
item['orgCity '] = each['orgCity']
item['dstCity '] = each['dstCity']
item['orgAirport '] = each['orgAirport']
item['dstAirport'] = each['dstAirport']
item['deptTime'] = each['deptTime']
item['arrvTime'] = each['arrvTime']
item['rate'] = each['rate']
yield item
报错信息
File "E:\pyproject\plane\plane\spiders\planespider.py", line 20, in parse
item['flightNo'] = each['flightNo']
TypeError: string indices must be integers

爬取途牛机票信息报错--TypeError: string indices must be integers
- 写回答
- 好问题 0 提建议
- 关注问题
- 邀请回答
-
1条回答 默认 最新
- ivan_prajak 2019-08-07 17:26关注
把for each in airbasic['flightMap']:这句改成for each in airbasic['flightMap'].values():,再试试。
因为data['airBasic']是一个字典,不是列表,你用for循环得到的是key值,而不是value值。本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报