Python requests中的headers使用报错

运行时报错

# _*_coding:utf-8_*_
# ! python3
"""
# Using pyCharm 2021.3.2 Community and python 3.8/3.9
"""

import requests
import os
import bs4
import random

headers = {
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Opera/9.25 (Windows NT 5.1; U; en)',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
    'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
    "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 "
}


def get_page(url):
    # Download the page
    print('Downloading page %s' % url)
    res = requests.get(url, headers=random.choice(headers))
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def search(soup):
    # Find the URL of the comic image.
    comicElem = soup.select('div[class="erPag"] mip-img')
    return comicElem


def download_image(comicUrl):
    # Download the image
    print('Downloading image %s...' % comicUrl)
    res = requests.get(comicUrl, headers=random.choice(headers))
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def save_image_file(res, folder_name, image_name):
    print('Saving image file %s...\n' % image_name)
    # Save image to folder
    imageFile = open(os.path.join(folder_name, os.path.basename(image_name)), 'wb')
    for chunk in res.iter_content(100000):
        imageFile.write(chunk)
    imageFile.close()


def get_prev(soup):
    # Get the button's url.
    prevLinc = soup.select('#action ul li mip-link')[2]
    url = prevLinc.get("href")
    return url


def main():
    url = 'https://m.100fanwo.com/manhua/dongjingzhongre/2782056.html'  # starting url
    folder_name = 'TKGC3'
    os.makedirs(folder_name, exist_ok=True)
    while not url.endswith('/'):
        res = get_page(url)
        soup = bs4.BeautifulSoup(res.text, features="html.parser")
        comicElem = search(soup)
        if not comicElem:
            print("Could not find comic image")
        else:
            comicUrl = comicElem[0].get('src')
            res = download_image(comicUrl)
            image_name = comicUrl
            save_image_file(res, folder_name, image_name)
        url = get_prev(soup)
    print('Done')


if __name__ == "__main__":
    main()

报错内容：TypeError: 'set' object is not subscriptable

写回答
好问题 0 提建议
追加酬金
关注问题
分享
邀请回答
编辑收藏删除
收藏举报

1条回答默认最新

陈年椰子 2022-03-26 17:19

关注


# _*_coding:utf-8_*_
# ! python3
"""
# Using pyCharm 2021.3.2 Community and python 3.8/3.9
# Tokyo Ghoul full comix downloader
# main.py download all comix from
https://www.m.100fanwo.com/manhua/dongjingzhongre/ on 2022
# the first page of the ComiX is
https://m.100fanwo.com/manhua/dongjingzhongre/2782054.html
"""

import requests
import os
import bs4
import random

headers = [
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Opera/9.25 (Windows NT 5.1; U; en)',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
    'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
    "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 "
    "Chrome/16.0.912.77 Safari/535.7",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ",
]


def get_page(url):
    # Download the page
    print('Downloading page %s' % url)
    res = requests.get(url, headers={'User-Agent':random.choice(headers)})
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def search(soup):
    # Find the URL of the comic image.
    comicElem = soup.select('div[class="erPag"] mip-img')
    return comicElem


def download_image(comicUrl):
    # Download the image
    print('Downloading image %s...' % comicUrl)
    res = requests.get(comicUrl, headers={'User-Agent':random.choice(headers)})
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def save_image_file(res, folder_name, image_name):
    print('Saving image file %s...\n' % image_name)
    # Save image to folder
    imageFile = open(os.path.join(folder_name, os.path.basename(image_name)), 'wb')
    for chunk in res.iter_content(100000):
        imageFile.write(chunk)
    imageFile.close()


def get_prev(soup):
    # Get the button's url.
    prevLinc = soup.select('#action ul li mip-link')[2]
    url = prevLinc.get("href")
    return url


def main():
    url = 'https://m.100fanwo.com/manhua/dongjingzhongre/2782056.html'  # starting url
    folder_name = 'TKGC3'
    os.makedirs(folder_name, exist_ok=True)
    while not url.endswith('/'):
        res = get_page(url)
        soup = bs4.BeautifulSoup(res.text, features="html.parser")
        comicElem = search(soup)
        if not comicElem:
            print("Could not find comic image")
        else:
            comicUrl = comicElem[0].get('src')
            res = download_image(comicUrl)
            image_name = comicUrl
            save_image_file(res, folder_name, image_name)
        url = get_prev(soup)
    print('Done')


if __name__ == "__main__":
    main()

本回答被题主选为最佳回答 , 对您是否有帮助呢?

编辑记录

报告相同问题？

关注问题

Python requests中的headers使用报错 python
2022-03-26 17:07

回答 1 已采纳 # _*_coding:utf-8_*_ # ! python3 """ # Using pyCharm 2021.3.2 Community and python 3.8/3.9 # Tokyo
python requests输出报错 python
2023-02-01 21:16

回答 2 已采纳可能是因為網站反爬，需要添加請求頭信息以模擬瀏覽器請求。你也可以把請求頭中的User-Agent改為一個常見的瀏覽器的版本，這樣就可以避免被網站反爬了。此错误也可能是由于代码中的链接是无效的，或者请求
Python爬虫配合VPN爬取出现报错 python 爬虫
2021-12-22 17:33

回答 1 已采纳你这个是VPN代理问题，你可以将VPN设置成部分代理，不要全部代理你的网络。
使用python将请求的requests headers参数格式化方法
2020-09-19 18:43

今天小编就为大家分享一篇使用python将请求的requests headers参数格式化方法，具有很好的参考价值，希望对大家有所帮助。一起跟随小编过来看看吧
python爬虫基础小题目报错 python
2022-04-12 13:59

回答 3 已采纳题主，你代码里链接写错了，是点号你写成了逗号 link="http://www.santostang.com/"
python爬虫检索超出范围报错 python 爬虫
2022-09-08 16:31

回答 3 已采纳你输出con，输出的con是空列表，说明你节点的定位是有问题的，直接去开发者面板复制定位路径
python requests无结果，如何解决？ python 有问必答
2021-12-15 15:26

回答 1 已采纳加上cookie headers = { 'content-type': 'text/html; charset=GBK', 'User-Agent': 'Mozilla/5.0 (W
解决python3 requests headers参数不能有中文的问题
2020-09-18 18:49

今天小编就为大家分享一篇解决python3 requests headers参数不能有中文的问题，具有很好的参考价值，希望对大家有所帮助。一起跟随小编过来看看吧
使用python requests session模拟登录后无法取得网页的问题 http python 有问必答
2021-08-16 09:06

回答 1 已采纳 import requests url_1 = 'http://www.daweilai211.com/Account/Login' headers = { 'user-agent': '
python中requests库中文乱码问题 html python 爬虫
2022-08-22 09:50

回答 5 已采纳设置一下编码集就行了： import requests url = 'https://www.baidu.com/s?%27' headers = { 'User-Agent': 'Mozilla
python类调用报错 python 爬虫
2022-04-13 17:31

回答 2 已采纳 get_tar_urls 写错了，应该是get_target_urls望采纳
python requests.get报错_解决Python requests 报错方法集锦
2021-01-14 11:31

海恩·欧鲁纳的博客 python版本和ssl版本都会...1 Python2.6x use requests一台老Centos机器上跑着古老的应用，加了一个新模块之后报错报错 InsecurePlatformWarning: A true SSLContext object is not available./usr/lib/python2.6/...
用python requests.post（）实现翻页，表单上传后返回数据缺失 python 有问必答
2021-11-27 19:50

回答 1 已采纳 1.需要传cookies参数,2.data数据的即字典的值都要写成字符串。获取json后再从中用bs4解析出数据。参考如下代码： import requests cookies = { 'P
python 使用requests库，以及请求报错问题解决
2022-10-14 14:12

回忆哆啦没有A梦的博客 python 使用requests库，以及请求报错问题解决。（响应状态418，请求异常，SSL错误，证书认证失败问题：exceptions.SSLError: HTTPSConnectionPool(host=‘httpbin.org’, port=443): Max retries exceeded with url...
python requests 报错整理
2023-03-10 18:14

爬吧爬吧的博客 python requests常见报错整理，包含各类疑难杂症
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
已结题（查看结题原因） 4月1日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
已采纳回答 3月26日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 3月26日

悬赏问题

¥15 一个服务器已经有一个系统了如果用usb再装一个系统，原来的系统会被覆盖掉吗
¥15 使用esm_msa1_t12_100M_UR50S蛋白质语言模型进行零样本预测时，终端显示出了sequence handled的进度条，但是并不出结果就自动终止回到命令提示行了是怎么回事：
¥15 前置放大电路与功率放大电路相连放大倍数出现问题
¥30 关于<main>标签页面跳转的问题
¥80 部署运行web自动化项目
¥15 腾讯云如何建立同一个项目中物模型之间的联系
¥30 VMware 云桌面水印如何添加
¥15 用ns3仿真出5G核心网网元
¥15 matlab答疑关于海上风电的爬坡事件检测
¥88 python部署量化回测异常问题

Python requests中的headers使用报错

1条回答 默认 最新

问题事件

悬赏问题

1条回答默认最新