我的代码: import urllib.request import ssl import re import os from collections import deque def writeFileBytes(htmlBytes, toPath): with open(toPath, "wb") as f: f.write(htmlBytes) def writeFileStr(htmlBytes, toPath): with open(toPath, "w") as f: f.write(str(htmlBytes)) def getHtmlBytes(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4421.5 Safari/537.36", } req = urllib.request.Request(url, headers=headers) context = ssl._create_unverified_context() response = urllib.request.urlopen(req, context=context) return response.read() def qqCeawler(url, toPath): htmlBytes = getHtmlBytes(url) # writeFileBytes(htmlBytes, r"F:\pythonproject\student\second\019-爬虫\0-爬虫练习\爬取网络中的qq号\file1.html") # writeFileStr(htmlBytes, r"F:\pythonproject\student\second\019-爬虫\0-爬虫练习\爬取网络中的qq号\file2.txt") htmlStr = str(htmlBytes) pat = r'(((http|ftp|https)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?)' re_url = re.compile(pat) urlsList = re_url.findall(htmlStr) urlsList = list(set(urlsList)) pat = r"[1-9]\d{4,9}" re_qq = re.compile(pat) qqsList = re_qq.findall(htmlStr) qqsList = list(set(qqsList)) f = open(toPath, "a") for qqStr in qqsList: f.write(qqStr + "\n") f.close() return urlsList def center(url, toPath): queue = deque() queue.append(url) while len(queue) != 0: targetUrl = queue.popleft() urlList = qqCeawler(targetUrl, toPath) for item in urlList: tempUrl = item[0] queue.append(tempUrl) url = "https://www.douban.com/group/topic/28954920/" toPath = r"F:\pythonproject\student\second\019-爬虫\0-爬虫练习\爬取网络中的qq号\qqFile.txt" center(url, toPath)
以下是报错:怎样解决,求解
Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1350, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1010, in _send_output
self.send(msg)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 950, in send
self.connect()
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 921, in connect
self.sock = self._create_connection(
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\socket.py", line 808, in create_connection
raise err
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\socket.py", line 796, in create_connection
sock.connect(sa)
TimeoutError: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:/pythonproject/student/second/019-爬虫/0-爬虫练习/爬取网络中的qq号/爬取网络中的QQ号.py", line 51, in <module>
center(url, toPath)
File "F:/pythonproject/student/second/019-爬虫/0-爬虫练习/爬取网络中的qq号/爬取网络中的QQ号.py", line 45, in center
urlList = qqCeawler(targetUrl, toPath)
File "F:/pythonproject/student/second/019-爬虫/0-爬虫练习/爬取网络中的qq号/爬取网络中的QQ号.py", line 21, in qqCeawler
htmlBytes = getHtmlBytes(url)
File "F:/pythonproject/student/second/019-爬虫/0-爬虫练习/爬取网络中的qq号/爬取网络中的QQ号.py", line 18, in getHtmlBytes
response = urllib.request.urlopen(req, context=context)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 525, in open
response = self._open(req, data)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1379, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1353, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。