学习python爬虫时,出现不知名问题
源代码如下:
askurl("https://movie.douban.com/top250?start=0")
def askurl(url): #模拟浏览器头部信息,向豆瓣服务器发送消息
head={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.188"
}
#用户代理,表示告诉豆瓣服务器,我们是什么类型的机器浏览器(本质上是告诉浏览器,我们可以接收什么水品的文件内容)
req = urllib.request.Request(url,headers=head)
html=''
try:
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
print(html)
except urllib.error.URLError as e:
if hasattr(e,'code'):
print(e.code)
if hasattr((e,'reason')):
print(e.reason)
#return html
报错内容如下:
Traceback (most recent call last):
File "D:\软件\pycharm\pythonProject7\爬虫主程序.py", line 54, in <module>
main()
File "D:\软件\pycharm\pythonProject7\爬虫主程序.py", line 19, in main
askurl("https://movie.douban.com/top250?start=0")
File "D:\软件\pycharm\pythonProject7\爬虫主程序.py", line 37, in askurl
response = urllib.request.urlopen(req)
File "D:\软件\python3.10\lib\urllib\request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "D:\软件\python3.10\lib\urllib\request.py", line 519, in open
response = self._open(req, data)
File "D:\软件\python3.10\lib\urllib\request.py", line 536, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "D:\软件\python3.10\lib\urllib\request.py", line 496, in _call_chain
result = func(*args)
File "D:\软件\python3.10\lib\urllib\request.py", line 1391, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "D:\软件\python3.10\lib\urllib\request.py", line 1348, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "D:\软件\python3.10\lib\http\client.py", line 1283, in request
self._send_request(method, url, body, headers, encode_chunked)
File "D:\软件\python3.10\lib\http\client.py", line 1294, in _send_request
self.putrequest(method, url, **skips)
File "D:\软件\python3.10\lib\http\client.py", line 1132, in putrequest
self._output(self._encode_request(request))
File "D:\软件\python3.10\lib\http\client.py", line 1212, in _encode_request
return request.encode('ascii')
UnicodeEncodeError: 'ascii' codec can't encode character '\uff1f' in position 11: ordinal not in range(128)
Process finished with exit code 1