用requests下载网页时,抛出了ConnectionError异常:
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))
是什么原因,怎么处理?
# coding=utf-8
import requests, bs4, os, time
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Accept': 'text/html, application/xhtml+xml, application/xml;q = 0.9, image/webp, image/apng, */*; q = 0.8, application/signed-exchange;v = b3;q = 0.9',
'Accept-Encoding': 'gzip, deflate, br'}
res = requests.get('https://www.baidu.com/s?wd=%E6%89%BE%E5%B7%A5%E4%BD%9C', headers=headers)
soup_job = bs4.BeautifulSoup(res.text)
linkList = soup_job.select('a[href]')
urlList = []
for link in linkList:
url = link.get('href')
if 'http' in url:
urlList.append(url)
num = 0
for url in urlList:
linkRes = requests.get(url)
if linkRes.raise_for_status() == 404:
print('Bad link!')
continue
linkFile = open('F:\\sites\\' + 'html' + str(num) + '.txt', 'wb')
for chunk in linkRes.iter_content(100000):
linkFile.write(chunk)
linkFile.close()
time.sleep(2)
num += 1