yueying639 2022-03-26 17:07 采纳率: 60%
浏览 46
已结题

Python requests中的headers使用报错

运行时报错

# _*_coding:utf-8_*_
# ! python3
"""
# Using pyCharm 2021.3.2 Community and python 3.8/3.9
"""

import requests
import os
import bs4
import random

headers = {
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Opera/9.25 (Windows NT 5.1; U; en)',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
    'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
    "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 "
}


def get_page(url):
    # Download the page
    print('Downloading page %s' % url)
    res = requests.get(url, headers=random.choice(headers))
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def search(soup):
    # Find the URL of the comic image.
    comicElem = soup.select('div[class="erPag"] mip-img')
    return comicElem


def download_image(comicUrl):
    # Download the image
    print('Downloading image %s...' % comicUrl)
    res = requests.get(comicUrl, headers=random.choice(headers))
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem : %s' % exc)
    return res


def save_image_file(res, folder_name, image_name):
    print('Saving image file %s...\n' % image_name)
    # Save image to folder
    imageFile = open(os.path.join(folder_name, os.path.basename(image_name)), 'wb')
    for chunk in res.iter_content(100000):
        imageFile.write(chunk)
    imageFile.close()


def get_prev(soup):
    # Get the button's url.
    prevLinc = soup.select('#action ul li mip-link')[2]
    url = prevLinc.get("href")
    return url


def main():
    url = 'https://m.100fanwo.com/manhua/dongjingzhongre/2782056.html'  # starting url
    folder_name = 'TKGC3'
    os.makedirs(folder_name, exist_ok=True)
    while not url.endswith('/'):
        res = get_page(url)
        soup = bs4.BeautifulSoup(res.text, features="html.parser")
        comicElem = search(soup)
        if not comicElem:
            print("Could not find comic image")
        else:
            comicUrl = comicElem[0].get('src')
            res = download_image(comicUrl)
            image_name = comicUrl
            save_image_file(res, folder_name, image_name)
        url = get_prev(soup)
    print('Done')


if __name__ == "__main__":
    main()


报错内容:TypeError: 'set' object is not subscriptable

  • 写回答

1条回答 默认 最新

  • 陈年椰子 2022-03-26 17:19
    关注
    
    # _*_coding:utf-8_*_
    # ! python3
    """
    # Using pyCharm 2021.3.2 Community and python 3.8/3.9
    # Tokyo Ghoul full comix downloader
    # main.py download all comix from
    https://www.m.100fanwo.com/manhua/dongjingzhongre/ on 2022
    # the first page of the ComiX is
    https://m.100fanwo.com/manhua/dongjingzhongre/2782054.html
    """
    
    import requests
    import os
    import bs4
    import random
    
    headers = [
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
        'Opera/9.25 (Windows NT 5.1; U; en)',
        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
        'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
        'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
        "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 "
        "Chrome/16.0.912.77 Safari/535.7",
        "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ",
    ]
    
    
    def get_page(url):
        # Download the page
        print('Downloading page %s' % url)
        res = requests.get(url, headers={'User-Agent':random.choice(headers)})
        try:
            res.raise_for_status()
        except Exception as exc:
            print('There was a problem : %s' % exc)
        return res
    
    
    def search(soup):
        # Find the URL of the comic image.
        comicElem = soup.select('div[class="erPag"] mip-img')
        return comicElem
    
    
    def download_image(comicUrl):
        # Download the image
        print('Downloading image %s...' % comicUrl)
        res = requests.get(comicUrl, headers={'User-Agent':random.choice(headers)})
        try:
            res.raise_for_status()
        except Exception as exc:
            print('There was a problem : %s' % exc)
        return res
    
    
    def save_image_file(res, folder_name, image_name):
        print('Saving image file %s...\n' % image_name)
        # Save image to folder
        imageFile = open(os.path.join(folder_name, os.path.basename(image_name)), 'wb')
        for chunk in res.iter_content(100000):
            imageFile.write(chunk)
        imageFile.close()
    
    
    def get_prev(soup):
        # Get the button's url.
        prevLinc = soup.select('#action ul li mip-link')[2]
        url = prevLinc.get("href")
        return url
    
    
    def main():
        url = 'https://m.100fanwo.com/manhua/dongjingzhongre/2782056.html'  # starting url
        folder_name = 'TKGC3'
        os.makedirs(folder_name, exist_ok=True)
        while not url.endswith('/'):
            res = get_page(url)
            soup = bs4.BeautifulSoup(res.text, features="html.parser")
            comicElem = search(soup)
            if not comicElem:
                print("Could not find comic image")
            else:
                comicUrl = comicElem[0].get('src')
                res = download_image(comicUrl)
                image_name = comicUrl
                save_image_file(res, folder_name, image_name)
            url = get_prev(soup)
        print('Done')
    
    
    if __name__ == "__main__":
        main()
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论 编辑记录

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 4月1日
  • 已采纳回答 3月26日
  • 创建了问题 3月26日

悬赏问题

  • ¥15 一个服务器已经有一个系统了如果用usb再装一个系统,原来的系统会被覆盖掉吗
  • ¥15 使用esm_msa1_t12_100M_UR50S蛋白质语言模型进行零样本预测时,终端显示出了sequence handled的进度条,但是并不出结果就自动终止回到命令提示行了是怎么回事:
  • ¥15 前置放大电路与功率放大电路相连放大倍数出现问题
  • ¥30 关于<main>标签页面跳转的问题
  • ¥80 部署运行web自动化项目
  • ¥15 腾讯云如何建立同一个项目中物模型之间的联系
  • ¥30 VMware 云桌面水印如何添加
  • ¥15 用ns3仿真出5G核心网网元
  • ¥15 matlab答疑 关于海上风电的爬坡事件检测
  • ¥88 python部署量化回测异常问题