import sys
import requests
from bs4 import BeautifulSoup
class downloader():
def __init__(self):
self.server='https://www.biqukan.com'
self.book='https://www.biqukan.com/18_18805'
self.names=[]
self.urls=[]
def get_url(self):
re=requests.get(self.book)
bs=BeautifulSoup(re.text,features='html5lib')
div = bs.find_all('div', class_='listmain')
bs_child = BeautifulSoup(str(div[0]), features="html5lib")
div_a = bs_child.find_all('a')
div_a = div_a[12:]
for a in div_a:
self.names.append(a.string)
self.urls.append(self.server + a.get('href'))
# for i in range(len(self.names)):
# print(self.names[i], ' ', self.urls[i])
def get_content(self,url):
re = requests.get(url)
bs=BeautifulSoup(re.text,features='html5lib')
content=bs.find_all('div',id='content')
text=content[0].text
# print(text)
return text
def save(self,path,name,text):
with open(path,'a',encoding='utf-8') as f:
f.write(name+'\n')
f.writelines(text)
f.write('\n\n')
if __name__=='__main__':
dl=downloader()
dl.get_url()
print('《地球唯一修士》开始下载:')
for i in range(len(dl.urls)):
t=dl.get_content(dl.urls[i])
dl.save('novel.txt', dl.names[i],t)
sys.stdout.write("已下载:{:.3f}".format(i/len(dl.names))+'\r')
sys.stdout.flush()
print("《地球唯一修士》下载完毕!")
Traceback (most recent call last):
File "test.py", line 47, in
t=dl.get_content(dl.urls[i])
File "test.py", line 31, in get_content
text=content[0].text
IndexError: list index out of range