代码如下
import scrapy
import json
import re
from plane.items import PlaneItem
class PlanespiderSpider(scrapy.Spider):
name = 'planespider'
allowed_domains = ['tuniu.com']
start_urls = ['https://flight-api.tuniu.com/query/flight/v2/list?callback=jQuery17207759591352059797_1565148905476&{"withTransfer":true,"passengers":[{"count":1,"psgType":"ADT"}],"voyType":"ST","voys":[{"orgCity":"CAN","dstCity":"BJS","deptDate":"2019-08-09"}],"tokenKey":"G92ZWFuZHBlYWNlMmxpZ2h0L3YyL2xpc","useToken":true}&_=1565148905672']
cityname = ('AQG', 'AKA', 'ABC')
url1 = []
for i in cityname:
for j in cityname:
if i != j:
urls = ( 'https://flight-api.tuniu.com/query/flight/v2/list?callback=jQuery17207759591352059797_1565148905476&{"withTransfer":true,"passengers":[{"count":1,"psgType":"ADT"}],"voyType":"ST","voys":[{"orgCity":"%s","dstCity":"%s","deptDate":"2019-08-09"}],"tokenKey":"G92ZWFuZHBlYWNlMmxpZ2h0L3YyL2xpc","useToken":true}&_=1565148905672' % (i, j))
url1.append(urls)
def parse(self, response):
plane_json = json.loads(re.match(".*?({.*}).*", response.body.decode(), re.S).group(1))
item = PlaneItem()
data = plane_json['data']
airbasic = data['airBasic']
flightmap = airbasic['flightMap'].values()
for each in flightmap:
item["airCom"] = each["airCom"]
item['flightNo'] = each['flightNo']
item['orgCity'] = each['orgCity']
item['dstCity'] = each['dstCity']
item['orgAirport'] = each['orgAirport']
item['dstAirport'] = each['dstAirport']
item['deptTime'] = each['deptTime']
item['arrvTime'] = each['arrvTime']
item['rate'] = each['rate']
yield item