scrapy使用代理后,报错:
2023-02-28 18:52:18 [scrapy.core.scraper] ERROR: Error downloading <GET http://guba.eastmoney.com/list,300059_1.html>
Traceback (most recent call last):
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\twisted\internet\defer.py", line 1693, in _inlineCallbacks
result = context.run(
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\twisted\python\failure.py", line 518, in throwExceptionIntoGenerator
return g.throw(self.type, self.value, self.tb)
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\core\downloader\middleware.py", line 52, in process_request
return (yield download_func(request=request, spider=spider))
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\utils\defer.py", line 73, in mustbe_deferred
result = f(*args, **kw)
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\core\downloader\handlers\__init__.py", line 79, in download_request
return handler.download_request(request, spider)
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\core\downloader\handlers\http11.py", line 72, in download_request
return agent.download_request(request)
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\core\downloader\handlers\http11.py", line 363, in download_request
agent = self._get_agent(request, timeout)
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\core\downloader\handlers\http11.py", line 327, in _get_agent
proxyScheme, proxyNetloc, proxyHost, proxyPort, proxyParams = _parse(proxy)
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\core\downloader\webclient.py", line 39, in _parse
return _parsed_url_args(parsed)
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\core\downloader\webclient.py", line 20, in _parsed_url_args
host = to_bytes(parsed.hostname, encoding="ascii")
File "C:\Users\18310\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\utils\python.py", line 111, in to_bytes
return text.encode(encoding, errors)
UnicodeEncodeError: 'ascii' codec can't encode character '\ufeff' in position 0: ordinal not in range(128)
我的中间件为:
class RandomProxyMiddleware(HttpProxyMiddleware):
# proxy从settings.py中读取PROXY
def __init__(self, auth_encoding='utf-8', proxy_list=None):
self.proxy = settings.PROXY
def process_request(self, request, spider):
# 随机选择一个代理IP
proxy = random.choice(self.proxy)
# 判断代理IP是否可用
if self.check_proxy(proxy):
print('当前使用的代理IP是:', proxy)
request.meta['proxy'] = proxy
else:
self.process_request(request, spider)
def check_proxy(self, proxy):
# 判断代理IP是否可用
try:
# 设置超时时间为3秒
requests.get('https://www.eastmoney.com/', proxies={'http': proxy}, timeout=3)
return True
except:
return False
使用的Ip可以访问“'https://www.eastmoney.com/%E2%80%9D
请问这个错误该怎么解决?