我需要爬取携程上所有机票的信息,发现有一个每一次打开网页随机生成的transactionID,在别的地方找了一个获取transactionID的方法,当时好像是首先请求了一次某个网页(本人初学,也不是太懂),所以等到实际利用获取的transactionID构造headers和data作为request.post函数的参数时,貌似transactionID又失效了,导致请求失败,求教。
import requests
import re
import json
import hashlib
from fake_useragent import UserAgent
from requests import RequestException
def getTransactionId(depCode, arrCode, date, adult, child, infant):
headers = {'User-Agent': UserAgent().random}
url1 = 'https://flights.ctrip.com/international/search/oneway-{}-{}?depdate={}&cabin=y_s&adult={}&child={}&infant={}'.format(
depCode, arrCode, date, adult, child, infant)
transactionId, data1 = None, None
for i in range(0, 3):
try:
response = requests.get(url1, headers=headers)
data1 = re.findall(r'GlobalSearchCriteria =(.+);', response.text)[0].encode('utf-8')
transactionId = json.loads(data1).get("transactionID")
return transactionId
except:
continue
return transactionId
def getInfo(depCode, arrCode, date, adult, child, infant):
tID = getTransactionId(depCode, arrCode, date, adult, child, infant)
sign_value = tID + depCode + arrCode + date
_sign = hashlib.md5()
_sign.update(sign_value.encode('utf-8'))
print(_sign.hexdigest())
print(tID)
url = 'https://flights.ctrip.com/international/search/api/search/batchSearch'
headers = {'authority': 'flights.ctrip.com', 'method': 'POST',
'path': '/ international / search / api / search / batchSearch', 'scheme': 'https',
'accept': 'application/json', 'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'no-cache', 'content-length': '813',
'content-type': 'application/json;charset=UTF-8',
'cookie': 'ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _RSG=qU4BuMgdEZDhxN.AaSlty8; '
'_RDG=28b78e2b704ce92d911f4b5d7e131eb0cc; _RGUID=d8ce9ea5-b82c-47b4-8f37-bdc90af97eeb; '
'_ga=GA1.2.17296818.1619623943; MKT_CKID=1619623943206.b6070.li7o; MKT_Pagesource=PC; '
'_RF1=111.60.76.211; MKT_CKID_LMT=1619916801837; _gid=GA1.2.283095646.1619916802; '
'_abtest_userid=ee2cb974-3baf-428b-845c-44dd7f00571a; FlightIntl=Search=['
'%22WUH|%E6%AD%A6%E6%B1%89( '
'WUH)|477|WUH|480%22%2C%22CKG|%E9%87%8D%E5%BA%86(CKG)|4|CKG|480%22%2C%222021-05-03%22]; '
'Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1619937597&Expires'
'=1620542397463; '
'MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1619937597466&CURL=https'
'%3A%2F '
'%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={'
'"pc_vid":"1619623937624.3pgbv2"}; GUID=09031148213609314518; nfes_isSupportWebP=1; '
'_pd=%7B%22r%22%3A1%2C%22_d%22%3A21%2C%22_p%22%3A4%2C%22_o%22%3A3%2C%22s%22%3A30%2C%22_s%22'
'%3A1%7D; '
'nfes_isSupportWebP=1; appFloatCnt=3; '
'_bfa=1.1619623937624.3pgbv2.1.1619933240406.1619947658458.5.27; '
'_jzqco=%7C%7C%7C%7C1619916801967%7C1.1204911439.1619623943193.1619947661416.1619947745389'
'.1619947661416.1619947745389.undefined.0.0.22.22; '
'__zpspc=9.6.1619947661.1619947745.2%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8'
'%258B%7C '
'%23; _bfi=p1%3D10320673302%26p2%3D10320673302%26v1%3D27%26v2%3D26',
'origin': 'https://flights.ctrip.com',
'referer': 'https://flights.ctrip.com/international/search/oneway-' + depCode + '-' + arrCode + '?depdate=' + '2021' + '-' + '5' + '-' + '4' + '&cabin=Y_S_C_F',
'scope': 'd', 'sign': str(_sign.hexdigest()), 'transactionid': str(tID),
'user - agent': 'Mozilla / 5.0(Windows NT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / '
'70.0.3538.25Safari / 537.36Core / 1.70.3861.400QQBrowser / 10.7.4313.400'}
data = {
"adultCount": 1, "childCount": 0, "infantCount": 0, "flightWay": "S", "cabin": "Y_S_C_F", "scope": "d",
"extensionAttributes": {"LoggingSampling": 'false', "isFlightIntlNewUser": 'false'},
"transactionID": str(tID), "flightSegments": [
{"departureCityCode": "WUH", "arrivalCityCode": "CKG", "departureCityName": "武汉", "arrivalCityName": "重庆",
"departureDate": "2021-05-04", "departureCountryId": 1, "departureCountryName": "中国",
"departureCountryCode": "CN", "departureProvinceId": 20, "departureCityId": 477, "arrivalCountryId": 1,
"arrivalCountryName": "中国", "arrivalCountryCode": "CN", "arrivalProvinceId": 4, "arrivalCityId": 4,
"departureCityTimeZone": 480, "arrivalCityTimeZone": 480, "timeZone": 480}], "directFlight": 'false',
"extGlobalSwitches": {"useAllRecommendSwitch": 'true', "unfoldPriceListSwitch": 'true'}, "noRecommend": 'false'
}
response = requests.post(url, data=json.dumps(data), headers=headers).text
print(response)
getInfo('wuh', 'ckg', '2021-5-4', 1, 0, 0)