最近刚接触python一个星期,写了一个爬虫小程序,用于 输入小说目录页码url从而下载小说
这个程序在下载十几章之后就会停止,然后显示IndexError: list index out of range
麻烦各位帮我看下,url可以用http://www.xbiquge.la/10/10489/
from bs4 import BeautifulSoup
import requests
mainl = "http://www.xbiquge.la/"
print("请输入小说目录界面URL(温馨提示:粘贴快捷键ctrl+v)") #提示用户输入Uniform Resource Locator
target =input() # 输入Uniform Resource Locator
print("请输入小说书名:")
name=input() #为目录命名,方便查看及保存
req = requests.get(url=target) # get
req.encoding = "utf-8" #防止乱码
html1 = req.text
bfl = BeautifulSoup(html1, "html.parser")
one_list = bfl.find_all("div", id="list")
twon_list = BeautifulSoup(str(one_list[0]), "html.parser")
t_list = twon_list.find_all("a")
two_list=one_list[0].text.replace('\xa0'*8,'\n')
print(two_list)
main_list=open("%s目录.txt"%(name),"a") #保存目录
main_list.write(two_list)
main_list.close()
print("该书目录已生成至小说根目录,名称为%s目录;请再次按下回车键"%(name))
input()
for each in t_list:
final_list=mainl + each.get("href")
req2 = requests.get(url=final_list) # get
req2.encoding = "utf-8"
html = req2.text
bf = BeautifulSoup(html, "html.parser")
texts = bf.find_all('div', id="content") # 提取标签
txtm = (texts[0].text.replace('\xa0' * 8, '\n'))
f = open('a.txt', 'a', encoding="utf-8") #储存小说内容
f.write(txtm)
f.close()
print("该小说内容已完全下载至程序根目录,谢谢使用")
input()