hxy。。 2021-05-02 23:41 采纳率: 63.6%
浏览 85
已结题

python爬虫问题求教,急急急急急

 我需要爬取携程上所有机票的信息,发现有一个每一次打开网页随机生成的transactionID,在别的地方找了一个获取transactionID的方法,当时好像是首先请求了一次某个网页(本人初学,也不是太懂),所以等到实际利用获取的transactionID构造headers和data作为request.post函数的参数时,貌似transactionID又失效了,导致请求失败,求教。

import requests
import re
import json
import hashlib
from fake_useragent import UserAgent
from requests import RequestException


def getTransactionId(depCode, arrCode, date, adult, child, infant):
    headers = {'User-Agent': UserAgent().random}

    url1 = 'https://flights.ctrip.com/international/search/oneway-{}-{}?depdate={}&cabin=y_s&adult={}&child={}&infant={}'.format(
        depCode, arrCode, date, adult, child, infant)
    transactionId, data1 = None, None
    for i in range(0, 3):
        try:
            response = requests.get(url1, headers=headers)
            data1 = re.findall(r'GlobalSearchCriteria =(.+);', response.text)[0].encode('utf-8')
            transactionId = json.loads(data1).get("transactionID")
            return transactionId
        except:
            continue
    return transactionId


def getInfo(depCode, arrCode, date, adult, child, infant):
    tID = getTransactionId(depCode, arrCode, date, adult, child, infant)
    sign_value = tID + depCode + arrCode + date
    _sign = hashlib.md5()
    _sign.update(sign_value.encode('utf-8'))
    print(_sign.hexdigest())
    print(tID)

    url = 'https://flights.ctrip.com/international/search/api/search/batchSearch'

    headers = {'authority': 'flights.ctrip.com', 'method': 'POST',
               'path': '/ international / search / api / search / batchSearch', 'scheme': 'https',
               'accept': 'application/json', 'accept-encoding': 'gzip, deflate, br',
               'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'no-cache', 'content-length': '813',
               'content-type': 'application/json;charset=UTF-8',
               'cookie': 'ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _RSG=qU4BuMgdEZDhxN.AaSlty8; '
                         '_RDG=28b78e2b704ce92d911f4b5d7e131eb0cc; _RGUID=d8ce9ea5-b82c-47b4-8f37-bdc90af97eeb; '
                         '_ga=GA1.2.17296818.1619623943; MKT_CKID=1619623943206.b6070.li7o; MKT_Pagesource=PC; '
                         '_RF1=111.60.76.211; MKT_CKID_LMT=1619916801837; _gid=GA1.2.283095646.1619916802; '
                         '_abtest_userid=ee2cb974-3baf-428b-845c-44dd7f00571a; FlightIntl=Search=['
                         '%22WUH|%E6%AD%A6%E6%B1%89( '
                         'WUH)|477|WUH|480%22%2C%22CKG|%E9%87%8D%E5%BA%86(CKG)|4|CKG|480%22%2C%222021-05-03%22]; '
                         'Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1619937597&Expires'
                         '=1620542397463; '
                         'MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1619937597466&CURL=https'
                         '%3A%2F '
                         '%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={'
                         '"pc_vid":"1619623937624.3pgbv2"}; GUID=09031148213609314518; nfes_isSupportWebP=1; '
                         '_pd=%7B%22r%22%3A1%2C%22_d%22%3A21%2C%22_p%22%3A4%2C%22_o%22%3A3%2C%22s%22%3A30%2C%22_s%22'
                         '%3A1%7D; '
                         'nfes_isSupportWebP=1; appFloatCnt=3; '
                         '_bfa=1.1619623937624.3pgbv2.1.1619933240406.1619947658458.5.27; '
                         '_jzqco=%7C%7C%7C%7C1619916801967%7C1.1204911439.1619623943193.1619947661416.1619947745389'
                         '.1619947661416.1619947745389.undefined.0.0.22.22; '
                         '__zpspc=9.6.1619947661.1619947745.2%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8'
                         '%258B%7C '
                         '%23; _bfi=p1%3D10320673302%26p2%3D10320673302%26v1%3D27%26v2%3D26',
               'origin': 'https://flights.ctrip.com',
               'referer': 'https://flights.ctrip.com/international/search/oneway-' + depCode + '-' + arrCode + '?depdate=' + '2021' + '-' + '5' + '-' + '4' + '&cabin=Y_S_C_F',
               'scope': 'd', 'sign': str(_sign.hexdigest()), 'transactionid': str(tID),
               'user - agent': 'Mozilla / 5.0(Windows NT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / '
                               '70.0.3538.25Safari / 537.36Core / 1.70.3861.400QQBrowser / 10.7.4313.400'}

    data = {
        "adultCount": 1, "childCount": 0, "infantCount": 0, "flightWay": "S", "cabin": "Y_S_C_F", "scope": "d",
        "extensionAttributes": {"LoggingSampling": 'false', "isFlightIntlNewUser": 'false'},
        "transactionID": str(tID), "flightSegments": [
            {"departureCityCode": "WUH", "arrivalCityCode": "CKG", "departureCityName": "武汉", "arrivalCityName": "重庆",
             "departureDate": "2021-05-04", "departureCountryId": 1, "departureCountryName": "中国",
             "departureCountryCode": "CN", "departureProvinceId": 20, "departureCityId": 477, "arrivalCountryId": 1,
             "arrivalCountryName": "中国", "arrivalCountryCode": "CN", "arrivalProvinceId": 4, "arrivalCityId": 4,
             "departureCityTimeZone": 480, "arrivalCityTimeZone": 480, "timeZone": 480}], "directFlight": 'false',
        "extGlobalSwitches": {"useAllRecommendSwitch": 'true', "unfoldPriceListSwitch": 'true'}, "noRecommend": 'false'
    }
    response = requests.post(url, data=json.dumps(data), headers=headers).text
    print(response)


getInfo('wuh', 'ckg', '2021-5-4', 1, 0, 0)
  • 写回答

1条回答 默认 最新

  • 任聪聪 全栈领域优质创作者 2021-05-03 11:27
    关注

    仔细看看代码,这里你犯的只是一个细节问题。

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

问题事件

  • 系统已结题 9月29日
  • 已采纳回答 9月21日

悬赏问题

  • ¥15 (标签-STM32|关键词-智能小车)
  • ¥20 关于#stm32#的问题,请各位专家解答!
  • ¥15 (标签-python)
  • ¥15 第一个已完成,求第二个做法
  • ¥20 搭建awx,试了很多版本都有错
  • ¥15 java corba的客户端该如何指定使用本地某个固定IP去连接服务端?
  • ¥15 activiti工作流问题,求解答
  • ¥15 有人写过RPA后台管理系统么?
  • ¥15 Bioage计算生物学年龄
  • ¥20 如何将FPGA Alveo U50恢复原来出厂设置哇?