运行时报错
# _*_coding:utf-8_*_
# ! python3
"""
# Using pyCharm 2021.3.2 Community and python 3.8/3.9
"""
import requests
import os
import bs4
import random
headers = {
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
"Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 "
}
def get_page(url):
# Download the page
print('Downloading page %s' % url)
res = requests.get(url, headers=random.choice(headers))
try:
res.raise_for_status()
except Exception as exc:
print('There was a problem : %s' % exc)
return res
def search(soup):
# Find the URL of the comic image.
comicElem = soup.select('div[class="erPag"] mip-img')
return comicElem
def download_image(comicUrl):
# Download the image
print('Downloading image %s...' % comicUrl)
res = requests.get(comicUrl, headers=random.choice(headers))
try:
res.raise_for_status()
except Exception as exc:
print('There was a problem : %s' % exc)
return res
def save_image_file(res, folder_name, image_name):
print('Saving image file %s...\n' % image_name)
# Save image to folder
imageFile = open(os.path.join(folder_name, os.path.basename(image_name)), 'wb')
for chunk in res.iter_content(100000):
imageFile.write(chunk)
imageFile.close()
def get_prev(soup):
# Get the button's url.
prevLinc = soup.select('#action ul li mip-link')[2]
url = prevLinc.get("href")
return url
def main():
url = 'https://m.100fanwo.com/manhua/dongjingzhongre/2782056.html' # starting url
folder_name = 'TKGC3'
os.makedirs(folder_name, exist_ok=True)
while not url.endswith('/'):
res = get_page(url)
soup = bs4.BeautifulSoup(res.text, features="html.parser")
comicElem = search(soup)
if not comicElem:
print("Could not find comic image")
else:
comicUrl = comicElem[0].get('src')
res = download_image(comicUrl)
image_name = comicUrl
save_image_file(res, folder_name, image_name)
url = get_prev(soup)
print('Done')
if __name__ == "__main__":
main()
报错内容:TypeError: 'set' object is not subscriptable