Python requests中的headers使用报错

运行时报错

# _*_coding:utf-8_*_
# ! python3
"""
# Using pyCharm 2021.3.2 Community and python 3.8/3.9
"""

import requests
import os
import bs4
import random

headers = {
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Opera/9.25 (Windows NT 5.1; U; en)',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
    'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
    "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 "
}


def get_page(url):
    # Download the page
    print('Downloading page %s' % url)
    res = requests.get(url, headers=random.choice(headers))
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def search(soup):
    # Find the URL of the comic image.
    comicElem = soup.select('div[class="erPag"] mip-img')
    return comicElem


def download_image(comicUrl):
    # Download the image
    print('Downloading image %s...' % comicUrl)
    res = requests.get(comicUrl, headers=random.choice(headers))
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def save_image_file(res, folder_name, image_name):
    print('Saving image file %s...\n' % image_name)
    # Save image to folder
    imageFile = open(os.path.join(folder_name, os.path.basename(image_name)), 'wb')
    for chunk in res.iter_content(100000):
        imageFile.write(chunk)
    imageFile.close()


def get_prev(soup):
    # Get the button's url.
    prevLinc = soup.select('#action ul li mip-link')[2]
    url = prevLinc.get("href")
    return url


def main():
    url = 'https://m.100fanwo.com/manhua/dongjingzhongre/2782056.html'  # starting url
    folder_name = 'TKGC3'
    os.makedirs(folder_name, exist_ok=True)
    while not url.endswith('/'):
        res = get_page(url)
        soup = bs4.BeautifulSoup(res.text, features="html.parser")
        comicElem = search(soup)
        if not comicElem:
            print("Could not find comic image")
        else:
            comicUrl = comicElem[0].get('src')
            res = download_image(comicUrl)
            image_name = comicUrl
            save_image_file(res, folder_name, image_name)
        url = get_prev(soup)
    print('Done')


if __name__ == "__main__":
    main()

报错内容：TypeError: 'set' object is not subscriptable

写回答
好问题 0 提建议
关注问题
分享
邀请回答
编辑收藏删除
收藏举报

1条回答默认最新

陈年椰子 2022-03-26 17:19

关注


# _*_coding:utf-8_*_
# ! python3
"""
# Using pyCharm 2021.3.2 Community and python 3.8/3.9
# Tokyo Ghoul full comix downloader
# main.py download all comix from
https://www.m.100fanwo.com/manhua/dongjingzhongre/ on 2022
# the first page of the ComiX is
https://m.100fanwo.com/manhua/dongjingzhongre/2782054.html
"""

import requests
import os
import bs4
import random

headers = [
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Opera/9.25 (Windows NT 5.1; U; en)',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
    'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
    "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 "
    "Chrome/16.0.912.77 Safari/535.7",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ",
]


def get_page(url):
    # Download the page
    print('Downloading page %s' % url)
    res = requests.get(url, headers={'User-Agent':random.choice(headers)})
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def search(soup):
    # Find the URL of the comic image.
    comicElem = soup.select('div[class="erPag"] mip-img')
    return comicElem


def download_image(comicUrl):
    # Download the image
    print('Downloading image %s...' % comicUrl)
    res = requests.get(comicUrl, headers={'User-Agent':random.choice(headers)})
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def save_image_file(res, folder_name, image_name):
    print('Saving image file %s...\n' % image_name)
    # Save image to folder
    imageFile = open(os.path.join(folder_name, os.path.basename(image_name)), 'wb')
    for chunk in res.iter_content(100000):
        imageFile.write(chunk)
    imageFile.close()


def get_prev(soup):
    # Get the button's url.
    prevLinc = soup.select('#action ul li mip-link')[2]
    url = prevLinc.get("href")
    return url


def main():
    url = 'https://m.100fanwo.com/manhua/dongjingzhongre/2782056.html'  # starting url
    folder_name = 'TKGC3'
    os.makedirs(folder_name, exist_ok=True)
    while not url.endswith('/'):
        res = get_page(url)
        soup = bs4.BeautifulSoup(res.text, features="html.parser")
        comicElem = search(soup)
        if not comicElem:
            print("Could not find comic image")
        else:
            comicUrl = comicElem[0].get('src')
            res = download_image(comicUrl)
            image_name = comicUrl
            save_image_file(res, folder_name, image_name)
        url = get_prev(soup)
    print('Done')


if __name__ == "__main__":
    main()

本回答被题主选为最佳回答 , 对您是否有帮助呢?

编辑记录

报告相同问题？

关注问题

python requests.get报错_解决Python requests 报错方法集锦
2021-01-14 11:31

海恩·欧鲁纳的博客 python版本和ssl版本都会...1 Python2.6x use requests一台老Centos机器上跑着古老的应用，加了一个新模块之后报错报错 InsecurePlatformWarning: A true SSLContext object is not available./usr/lib/python2.6/...
Python requests库常见报错汇总
2025-06-13 17:41

废材的自我修养的博客 requests库是Python中最流行的HTTP客户端库之一，但在使用过程中可能会遇到各种错误。
python requests 报错整理
2023-03-10 18:14

爬吧爬吧的博客 python requests常见报错整理，包含各类疑难杂症
python requests.get报错_python 第三方库requests连接url报错
2021-01-14 11:31

weixin_39860952的博客使用的语句是page = requests.get( url , headers = self.header, timeout = 10 , verify = flag )各变量的值分别为url = 'http://www.sbacn.org'flag = Falseself.header = {'User-Agent' : 'Mozilla/5.0 ...
python 使用requests库，以及请求报错问题解决
2022-10-14 14:12

回忆哆啦没有A梦的博客 python 使用requests库，以及请求报错问题解决。（响应状态418，请求异常，SSL错误，证书认证失败问题：exceptions.SSLError: HTTPSConnectionPool(host=‘httpbin.org’, port=443): Max retries exceeded with url...
pythonrequests方法_解决Python requests报错方法总结
2020-12-04 20:21

weixin_39589644的博客这篇文章主要介绍了解决Python requests 报错方法集锦的相关资料,需要的朋友可以参考下python版本和ssl版本都会导致 requests在请求https网站时候会出一些错误，最好使用新版本。1 Python2.6x use requests一台老...
python requests header 参数_解决python3 requests headers参数不能有中文的问题
2020-12-11 14:34

weixin_39686634的博客 1 需求，heeaders 参数需要拼接中文参数param 解决如下url = 'https://....search?keyword=' + param + '&...pageSize=10'headers = {"Accept": "application/json, text/javascript, */*; q=0.01","Acce...
Python requests库使用详解
2022-12-25 16:49

永远是少年啊的博客今天继续给大家介绍Python 爬虫相关知识，本文主要内容是Python requests库使用详解。一、Python requests库简介二、requests库常用方法三、response对象使用
python requests最全使用指南
2023-12-18 14:21

大模型教程最新的博客 Requests 是一个 Python 的一个第三方库，通过发送 HTTP 请求获取响应数据，一般应用于编写网络爬虫和接口测试等。相比 urllib 库，它语法简单，更容易上手。Requests: 让 HTTP 服务人类在使用 requests 模拟发送...
用 Python 的 requests 库爬网页，加个 headers 就不被封？实测有效
2025-08-08 23:11

大力出奇迹985的博客本文通过对 “使用 Python 的 requests 库爬网页时，添加 headers 是否能避免被封” 这一问题的探讨和实测，得出以下结论：添加 headers 能够有效提高爬虫请求的成功率，减少被网站封禁的概率，是爬虫开发中非常重要...
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
已结题（查看结题原因） 4月1日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
已采纳回答 3月26日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 3月26日

Python requests中的headers使用报错

1条回答 默认 最新

问题事件

1条回答默认最新