为什么在源码中获取到的url就可以复制打开进去,用requests获取到以后却点不开呢?
url = 'https://m.baidu.com/s?word=二陈丸最多吃多长时间一个疗程是多少'
USER_AGENTS = [
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0; Baiduspider-ads) Gecko/17.0 Firefox/17.0",
"Mozilla/5.0 (Linux; U; Android 1.6; en-us; SonyEricssonX10i Build/R1AA056) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1"
]
random_agent = USER_AGENTS[randint(0, len(USER_AGENTS) - 1)]
headers = {
'User-Agent': random_agent,
'Connection': 'close', # 关闭长连接
}
proxies = {"http": None, "https": None}
urllib3.disable_warnings()
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'ALL'
requests.adapters.DEFAULT_RETRIES = 10 # 设置重连次数
session = requests.Session()
try:
response = session.get(str(url), headers=headers, proxies=proxies, verify=False, timeout=(5, 15))
except requests.exceptions.RequestException as e:
print(e)
response = session.get(str(url), headers=headers, proxies=proxies, verify=False, timeout=(5, 15))
# 目标主页的网页源码
soup = BeautifulSoup(response.content, 'lxml')
print(soup.select('[data-module="-isADInfos:true"]'))
这张是网页找的
这张是程序找的
网页找到的运行结果
程序找到的运行结果