敲敲我的脑袋 2023-11-01 09:17 采纳率: 57.1%
浏览 10
已结题

爬虫遇到风控请求拦截/阻断,请求失败

爬虫访问不到数据,后端报错,遭遇风控阻断或拦截,网页会出现验证(下方第二张图片),除了设置访问的时间sleep,麻烦大家看一下还有其他办法吗

img

img

下面附上我的代码:

import re
import csv
import pandas as pd
import requests
import json
import random
from time import sleep
# ?&source=3&tenderCode=cscec202309120000226457
base_url='https://yzmtg.yzw.cn/portal/tender/winner/detail?'
url='https://yzmtg.yzw.cn/portal/tender/search'

headers={
    'accept-encoding': 'gzip, deflate, br',
    'boxid': 'BkvHQ4KN4Ka6ILeEpNLXc9gHlmhYEA30opWpFbNbZAjeJ+QKg9qWbXxEYPjAlgbJ3ge/A7JqX1jCKPc6GMwYcQQ==',
    'x-auth':'fkKshLXqlhhHLIzSipW0DLhGurSzylUIV2eRf7ocyLn8mry4Cfoc7Af+gHL9EJRgYsGaR74BJO+RdFMCTQ5mro/gTlWPNPrew7JhiZ9p7xZas5poZC9K0fDZuVg0Uvd24aQ4mgFHSPmw3mIcM9d7jfdQwNkcUKkA6lTm8yTS1lU=',
    'content-type': 'application/json',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.1.4031 SLBChan/30',
    'x-yzw-auth-token': 'eON4ElMhyf1NXN6ARGnuWMHDu5ev0xEHI7pc02jBsu2TRRyMuFFSdZwE7fAPPTaM9ydbz7LxLBPq%2BeAbVjaHxg7x6hakMTGP5KxJ%2BvWiDws%3D',
    'referer': 'https://xy.yzw.cn/search/sj/bid',
    'cookie':'Hm_lvt_c1832dca4922753f642109f295f07eba=1698710490; yzw-auac-token=eON4ElMhyf1NXN6ARGnuWMHDu5ev0xEHI7pc02jBsu2TRRyMuFFSdZwE7fAPPTaM9ydbz7LxLBPq+eAbVjaHxg7x6hakMTGP5KxJ+vWiDws=; LoginRequestKey=C48368C7319C27F01498961883BFED38341F634CB9368971EF70317A4A9D5CA05077590E5CDDAF25A8D0F7DEEE54AD81805C82B997F0B3BC0E400F9083406D84D6AEF74636107A16F101E7FD671D085519D8C8613002DA27BC6D0D26CFDCA189BF01BC221CCDAE50D212503F88591FFE46813C0E9226AC568F643A7587AA1B31F88779DF1558B9831E55FB100EF94DA90B; web.auth.yzw=FFE9EAC1BE770A00E02FA20847B363978F710544F325EE3ABD3B535C69906CE725085C7908A7FE6DE2C3ECF5C510A597E508D594B0CB83A5524B00C9D929079892BBE6326D094809E54AFDCE5801671A7D5F4DD78957B332955E3A17F5D496D1B20CB13678F8A552FF601B2B8D2C4C930682251D9931A60C983C01608D933A058A88038B7D32F9A6096CFB2E18FE01F52184CFC36DC301E569E9F1A278DBE951549651AE815EDE617EB758CF941B5366C1B55D5685259C839FFFAF7FD4249F4C107BA398103461B57E1FBEC6CDE64A28F8B02E08BE19F1C177665F67A4355724B82CC3FD73C41F597E1E2711C7CF07246DB2008401CE74977286A80657B29AEFA887210E0092324CA8024BBED18D75C5A67C68D77B80185CE36573E526946BFF7F4E04F051E0F5915FACA832E00308DD525070B7A4478174B003864E216CC1339BF5286FFC3D0E6CE06F9FCBA51295766D593DDBF97AE642FA132E939A9646048C45349D536068D2FC60E799C1EDC380BFCFCD3798FDF4FB34597B29C13AC8E0886E10D87D3C16CFEEAA632A09EAC1D8E17EC3C77B466A1C19859CAC7D7B7DD8A5350792E80A1C3CC0CC03E6B87F1876C3707BB99676DD0207B2BCD6C2A93C81F382B375E4B7AE516F4857549B46C50633D649079C099CAF65C2B6709659BEE1ADEEF3BE5C8D3A831FA0338EC42712F4B3D6DB8FB444141DC7D8E05DE617DE04323CFA2EA206A8ED2201891F6209BDA546392686F500F3155C24987A332FCABD03B7B98F814BBB323230D35C94D5D5C11CA2200DA0329234D6B04884EBE063781FDD32BB580C4BE4BB12F7D47A9DD7418BD6CED88417BCAC34179AF97958E637D75236B784C4241B3B6719902A1F98384DE7E77E0DD3E59129EE5D91EC233282825D3A8E56B60CEB33001DD89467E52C4A446973F391C76E34486E8C9DA625D752FB2837B187E27AC7854739598D545CF0EE61DCB53D07E41083D21EFA8CE8AAEFCC40C5BB919BE1B3A0FF7AAADC586301F183752F3CE8913974C38372BD194820EDFEE9A3ED40262A42759169; Hm_lpvt_c1832dca4922753f642109f295f07eba=1698710799; HWWAFSESTIME=1698710808400; HWWAFSESID=2f46597512c433fee4'
}
for i in range(94,1001):
    data={
      "pageNum": i,
      "pageSize": 10,
      "param": {}
    }

    res=requests.post(url=url,headers=headers,data=json.dumps(data))
    # # print(res.status_code)
    print(res.text)
    # object1=re.compile(r'"source":(?P<source>.*?),',re.S)
    # object2=re.compile(r'"tenderCode":"(?P<tenderCode>.*?)"',re.S)
    object3=re.compile(r'"area":"(?P<area>.*?)"',re.S)
    object4=re.compile(r'"endTime":"(?P<endTime>.*?)"',re.S)
    object5=re.compile(r'"name":"(?P<name>.*?)"',re.S)
    object6=re.compile(r'"publishDate":"(?P<publishDate>.*?)"',re.S)
    object7 = re.compile(r'"tenderCompanyName":"(?P<tenderCompanyName>.*?)"', re.S)


'''
    # result1=object1.finditer(res.text)
    # result2=object2.finditer(res.text)
    # result3=object3.finditer(res.text)
    # result4=object4.finditer(res.text)
    # result5=object5.finditer(res.text)
    # result6=object6.finditer(res.text)
    # result7=object7.finditer(res.text)
 '''   
    result3 = object3.findall(res.text)
    result4 = object4.findall(res.text)
    result5 = object5.findall(res.text)
    result6 = object6.findall(res.text)
    result7 = object7.findall(res.text)
    dictt={}

    # for it in result1:
    #     dict1=it.groupdict()
    # for it in result2:
    #     dict2=it.groupdict()
    list1=[]
    for it in result3:
        list1.append(it)
    print(list1)
    list2 = []
    for it in result4:
        list2.append(it)
    print(list2)
    list3 = []
    for it in result5:
        list3.append(it)
    print(list3)
    list4 = []
    for it in result6:
        list4.append(it)
    print(list4)
    list5 = []
    for it in result7:
        list5.append(it)
    print(list5)
    list_target = []
    list6 = zip(list1,list2, list3,list4,list5)
    for i in list6:
        list_target.append(i)
    print(list_target)  # 结果:[('A', 'X'), ('B', 'Y'), ('C', 'Z')]
    print(len(list_target))
    r = [x for x in list_target]
    print(r)
    print(*r, sep="\n")
    print(type(r))


    f = open("data_yzw2.csv", mode='a+',newline='', encoding='utf-8')
    csvwriter = csv.writer(f)
    for row in r:
        csvwriter.writerow(row)
    f.close()
    print('over!')

    t = random.randint(5, 10)
    sleep(t)

# data = pd.read_csv('data_yzw2.csv')


  • 写回答

3条回答 默认 最新

  • 集成显卡 2023-11-01 09:55
    关注

    这个没有很好的办法,因为已经是后端进行控制了。这里提供几个仅供参考的方案:
    1、看下目标网站是否有 wap/移动版(通常会有所松懈)
    2、调用验证码识别服务(比如超级鹰),识别验证码,已跳过安全拦截

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(2条)

报告相同问题?

问题事件

  • 系统已结题 11月10日
  • 已采纳回答 11月2日
  • 修改了问题 11月1日
  • 创建了问题 11月1日

悬赏问题

  • ¥15 Opencv(C++)异常
  • ¥15 VScode上配置C语言环境
  • ¥15 汇编语言没有主程序吗?
  • ¥15 这个函数为什么会爆内存
  • ¥15 无法装系统,grub成了顽固拦路虎
  • ¥15 springboot aop 应用启动异常
  • ¥15 matlab有关债券凸性久期的代码
  • ¥15 lvgl v8.2定时器提前到来
  • ¥15 qtcp 发送数据时偶尔会遇到发送数据失败?用的MSVC编译器(标签-qt|关键词-tcp)
  • ¥15 cam_lidar_calibration报错