# —*- codeing = utf-8 -*-
# @Time : 2021/3/8 6:49
# @File :05. bs4实例——三国演义.py
# @Software : PyCharm
import requests
from bs4 import BeautifulSoup
if __name__ == '__main__':
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0'
}
url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
page_text = requests.get (url=url, headers=headers).text
# 在首页中解析出章节的标题和详情页的uil
# 1 示例化BeatifulSoup对象,需要将源码数据加载到该对象中
soup = BeautifulSoup (page_text, 'lxml')
# 解析获取章节标题和详情页的url
li_lest = soup.select ('.book-mulu > ul > li')
fp = open ('./三国演义.txt', 'w', encoding='utf-8')
for li in li_lest :
title = li.a.string
detail_url = 'https://www.shicimingju.com/' + li.a['href']
# 对详情页发起请求,解析章节内容
detail_page_text = requests.get (url=detail_url, headers=headers).text
#
delattr_soup = BeautifulSoup (detail_page_text, 'lxml')
div_tag = delattr_soup.find ('div', class_='chapter_content')
#
content = div_tag.text
#
fp.write (title + ':' + content + '/n')
print (title, '爬取成功')
到底哪里出现问题了