poorzhu 2020-07-29 15:04 采纳率: 50%
浏览 1669

python爬虫报错,urllib.error.URLError: <urlopen error [Errno 0] Error>

在进行爬虫学习的时候,进行电影天堂的url进行爬取,但是代码执行后,一直报这个错误,请各位大神帮忙解决

代码:

#!/usr/bin/python
# -*- encoding: utf-8 -*-


from urllib import request
import re
import time
import random
from useragent import ua_list



class FilmSky(object):
    def __init__(self):
        self.url = 'http://www.dytt8.net/html/gndy/dyzz/list_23_{}.html'

    # 获取html函数(因为两个页面都需要发请求)
    def get_page(self, url):
        req = request.Request(
            url=url,
            headers={'User-Agent': random.choice(ua_list)}
        )
        res = request.urlopen(req)
        html = res.read().decode('utf-8')
        return html

    # 解析提取数据(把名称和下载链接一次性拿到)
    # html为一级页面响音内容
    def parse_page(self, html):
        # 先解析以及界面(电影名和详情链接)
        pattern = re.compile('<table width="100%".*?>.*?heitht="26">.*?<a href="(.*?)" class="ulink">(.*?)</a>', re.S)
        film_list = pattern.findall(html)
        for film in film_list:
            film_name = film_list[1]
            film_link = 'http://www.dytt8.net/' + film_list[0]
            # 拿到详情链接后,再去获取详情链接html,提取下载链接
            download_link = self.parse_two_html(film_link)

            # 测试
            d = {
                '名称:': film_name,
                '链接:': download_link
            }

    # 二级爬虫
    def parse_two_html(self, film_link):
        two_html = self.get_page(film_link)
        pattern = re.compile('<table style="BORDER_BOTTOM: .*?".*?>.*?>(.*?)</a>', re.S)
        download_link = pattern.findall(two_html)[0]
        return download_link

    def main(self):
        for page in range(1, 2):
            url = self.url.format(page)
            html = self.get_page(url)
            self.parse_page(html)
            time.sleep(random.randint(1, 3))
            print("第%d页面完成爬虫作业" % page)


if __name__ == '__main__':
    import ssl
    ssl._create_default_https_context = ssl._create_unverified_context
    start = time.time()
    spider = FilmSky()
    spider.main()
    end = time.time()
    print("执行时间为:%.2f" % (end - start))

遇到的报错信息:

/Users/apple/PycharmProjects/spider/venv/bin/python /Users/apple/PycharmProjects/spider/day02/06_film_sky_two_urls.py
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1319, in do_open
    h.request(req.get_method(), req.selector, req.data, headers,
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1230, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1276, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1225, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1004, in _send_output
    self.send(msg)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 944, in send
    self.connect()
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1399, in connect
    self.sock = self._context.wrap_socket(self.sock,
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/ssl.py", line 500, in wrap_socket
    return self.sslsocket_class._create(
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/ssl.py", line 1040, in _create
    self.do_handshake()
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/ssl.py", line 1309, in do_handshake
    self._sslobj.do_handshake()
OSError: [Errno 0] Error

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/apple/PycharmProjects/spider/day02/06_film_sky_two_urls.py", line 78, in <module>
    spider.main()
  File "/Users/apple/PycharmProjects/spider/day02/06_film_sky_two_urls.py", line 67, in main
    html = self.get_page(url)
  File "/Users/apple/PycharmProjects/spider/day02/06_film_sky_two_urls.py", line 35, in get_page
    res = request.urlopen(req)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 525, in open
    response = self._open(req, data)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 542, in _open
    result = self._call_chain(self.handle_open, protocol, protocol +
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1362, in https_open
    return self.do_open(http.client.HTTPSConnection, req,
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1322, in do_open
    raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 0] Error>

Process finished with exit code 1
  • 写回答

1条回答 默认 最新

  • threenewbee 2020-07-29 17:02
    关注

    res = request.urlopen(req)
    在这里上面一行
    print(req.url)
    看看你的地址是否合法,是不是https才行,参数传入了没有
    如果还不行,在浏览器打开看看行不行,是不是网络/代理的问题
    还不行,user-agent设置为和浏览器一样的,看看是不是反爬虫禁止了

    评论

报告相同问题?

悬赏问题

  • ¥60 二次元手游日常任务自动化代肝(相关搜索:自动化)
  • ¥15 mysql将查询的结果作为动态列名怎么实现
  • ¥50 python自动地图截图脚本
  • ¥15 悬赏一本书(内含Matlab代码)的书名、作者
  • ¥20 瑞萨RA4M1芯片刷写为arduino r4 minima
  • ¥15 前端vue跟后端java服务部署在线上阿里云服务器
  • ¥15 fastreport怎么判断当前页数
  • ¥15 Kylin-Desktop-V10-GFB-Release-JICAI_02- 2207-Build14-ARM64.iso有没有这个版本的系统啊
  • ¥15 能不能通过蓝牙将传感器数据传送到手机上
  • ¥20 100元python和数据科学实验项目