vanfanvan 2021-03-13 15:50 采纳率: 0%
浏览 1159

爬虫时遇到问题:urlopen error [WinError 10060]

我的代码:
import urllib.request
import ssl
import re
import os
from collections import deque
def writeFileBytes(htmlBytes, toPath):
    with open(toPath, "wb") as f:
        f.write(htmlBytes)
def writeFileStr(htmlBytes, toPath):
    with open(toPath, "w") as f:
        f.write(str(htmlBytes))
def getHtmlBytes(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4421.5 Safari/537.36",
    }
    req = urllib.request.Request(url, headers=headers)
    context = ssl._create_unverified_context()
    response = urllib.request.urlopen(req, context=context)
    return response.read()
def qqCeawler(url, toPath):
    htmlBytes = getHtmlBytes(url)
    # writeFileBytes(htmlBytes, r"F:\pythonproject\student\second\019-爬虫\0-爬虫练习\爬取网络中的qq号\file1.html")
    # writeFileStr(htmlBytes, r"F:\pythonproject\student\second\019-爬虫\0-爬虫练习\爬取网络中的qq号\file2.txt")
    htmlStr = str(htmlBytes)
    pat = r'(((http|ftp|https)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?)'
    re_url = re.compile(pat)
    urlsList = re_url.findall(htmlStr)
    urlsList = list(set(urlsList))
    pat = r"[1-9]\d{4,9}"
    re_qq = re.compile(pat)
    qqsList = re_qq.findall(htmlStr)
    qqsList = list(set(qqsList))
    f = open(toPath, "a")
    for qqStr in qqsList:
        f.write(qqStr + "\n")
    f.close()
    return urlsList
def center(url, toPath):
    queue = deque()
    queue.append(url)
    while len(queue) != 0:
        targetUrl = queue.popleft()
        urlList = qqCeawler(targetUrl, toPath)
        for item in urlList:
            tempUrl = item[0]
            queue.append(tempUrl)
url = "https://www.douban.com/group/topic/28954920/"
toPath = r"F:\pythonproject\student\second\019-爬虫\0-爬虫练习\爬取网络中的qq号\qqFile.txt"
center(url, toPath)

以下是报错:怎样解决,求解

Traceback (most recent call last):
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1350, in do_open
    h.request(req.get_method(), req.selector, req.data, headers,
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1255, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1301, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1250, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1010, in _send_output
    self.send(msg)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 950, in send
    self.connect()
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 921, in connect
    self.sock = self._create_connection(
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\socket.py", line 808, in create_connection
    raise err
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\socket.py", line 796, in create_connection
    sock.connect(sa)
TimeoutError: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "F:/pythonproject/student/second/019-爬虫/0-爬虫练习/爬取网络中的qq号/爬取网络中的QQ号.py", line 51, in <module>
    center(url, toPath)
  File "F:/pythonproject/student/second/019-爬虫/0-爬虫练习/爬取网络中的qq号/爬取网络中的QQ号.py", line 45, in center
    urlList = qqCeawler(targetUrl, toPath)
  File "F:/pythonproject/student/second/019-爬虫/0-爬虫练习/爬取网络中的qq号/爬取网络中的QQ号.py", line 21, in qqCeawler
    htmlBytes = getHtmlBytes(url)
  File "F:/pythonproject/student/second/019-爬虫/0-爬虫练习/爬取网络中的qq号/爬取网络中的QQ号.py", line 18, in getHtmlBytes
    response = urllib.request.urlopen(req, context=context)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 525, in open
    response = self._open(req, data)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 542, in _open
    result = self._call_chain(self.handle_open, protocol, protocol +
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 502, in _call_chain
    result = func(*args)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1379, in http_open
    return self.do_open(http.client.HTTPConnection, req)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1353, in do_open
    raise URLError(err)
urllib.error.URLError: <urlopen error [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。

  • 写回答

2条回答 默认 最新

  • CSDN-Ada助手 CSDN-AI 官方账号 2022-09-07 17:27
    关注
    不知道你这个问题是否已经解决, 如果还没有解决的话:

    如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 以帮助更多的人 ^-^
    评论

报告相同问题?

悬赏问题

  • ¥100 速求!商品购买力最优化问题(用遗传算法求解,给出python代码)
  • ¥15 虚拟机检测,可以是封装好的DLL,可付费
  • ¥15 kafka无法正常启动(只启动了一瞬间会然后挂了)
  • ¥30 使用matlab将观测点聚合成多条目标轨迹
  • ¥15 Workbench中材料库无法更新,如何解决?
  • ¥20 如何推断此服务器配置
  • ¥15 关于github的项目怎么在pycharm上面运行
  • ¥15 内存地址视频流转RTMP
  • ¥100 有偿,谁有移远的EC200S固件和最新的Qflsh工具。
  • ¥15 有没有整苹果智能分拣线上图像数据