python爬小说,爬出来的正文怎么都不能换行,都是凑一起的,而且最后还提示IndexError: list index out of range。有大佬可以帮忙看看吗?
import requests
from bs4 import BeautifulSoup
import os
path = r'D:\pythonProject2\ '
passage = 0
url = "https://m.dijiubook.net/70_70151/25290272.html"
endurl = "https://m.dijiubook.net/70_70151/25290272.html"
head = {}
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0'}
while True:
r = requests.get(url, headers=head)
r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text, "html.parser")
#标题
title = soup.select("#nr_title")[0].get_text()
#正文
text = soup.select("#neirong")[0].get_text()
with open(path + "《传说之异世双尊》by 冰糖莲子羹.txt", 'a', encoding='utf-8') as f:
f.write("\r\n")
for i in title:
f.write(i)
f.write('\r\n')
for x in text:
f.write(x)
f.write('\r\n')
passage+=0
#if(url==endurl):
#break
nexturl = "https://m.dijiubook.net" + soup.select("#pb_next")[0]['href']
url = nexturl
print(title+text)
os.system("pause")
Traceback (most recent call last):
File "D:\pythonProject2\小时.py", line 30, in <module>
nexturl = "https://m.dijiubook.net" + soup.select("#pb_next")[0]['href']
IndexError: list index out of range