使用python的requests库爬取百度中的数据
使用response.encoding = 'utf8'转换后
response.text仍然乱码
结果:
代码如下:
```python
import urllib.request
import urllib.parse
import requests
from bs4 import BeautifulSoup
# 模拟百度搜索
def baiduAPI(params):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Cookie": "HMACCOUNT_BFESS=BD73DB1D35AF871A; BDUSS_BFESS=VJJdkswYnVrT0JHUmFGTFRpTWhUUn5wLVlLbkJvTzA1WTBDcnZwQTU2UU9pYlZqRVFBQUFBJCQAAAAAAAAAAAEAAABcJyX5wO7KzcH6NwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA78jWMO~I1jc; BAIDUID_BFESS=2B61874755F437B5EC6BEF21B3134DF7:FG=1; ZFY=K9QfSyPCgzUKQk2s8CZ4eAp9owXYz:BJRF7ftEin2pJ8:C; H_PS_PSSID=36543_37552_38092_38052_37990_37796_36803_37930_38088_38041_26350_38009_37881; ab_sr=1.0.1_NzYyZWM1ZDU3OGQ2NGM2ODMxYmQ3MTZmNjE4YWI0OGY2NzIzYWY2YzQ3MjI3YTg5M2U2ODljYWFjZWY2NTg0ODE5NWY2YjhiZDk2YTQ2ZjU0NzZhM2MyZmY2YzFlOGM0YWRmZGU3ZDNkN2VjZjY4MjYwYzQ5MzNhYWQzOTdkYTExOWZkMDhhZTRlMzI0MWZkZGJkNDU3ZTk5YjY2N2ZhZDM3NjAwNDZhMGMxNjg4N2U4N2UwMzU4MTg3ZTFhNTY3",
"Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
'Accept-Language': 'zh-CN,zh;q=0.9'
}
url = "https://www.baidu.com/s?" + params
response = requests.get(url, headers=headers)
response.encoding = 'utf8'
print(response.text)
#with open("zhongguo.html",'wb') as f:
# f.write(response.text)
return response
if __name__ == "__main__":
wd = {"wd": "中国"}
params = urllib.parse.urlencode(wd)
print(params) # 'wd=aa'
response = baiduAPI(params)
# print(response.content)
```