import requests
import re # 内库 ,标准库
# 规范 类需要跟上一行程序空两行
class NovelSpider:
def __init__(self):
self.session = requests.Session() # 下载器
def get_novel(self, url): # 添加URL参数,为了使能够方便下载任意一个小说
# 下载小说
# 下载小说的首页面的html
index_html = self.download(url, encoding='gbk')
# 提取章节信息,url 网址
novel_chapter_infos = self.get_chapter_infos(index_html)
# 下载章节信息
def download(self, url, encoding):
# 下载html源码
response = self.session.get(url)
response.encoding = encoding
html = response.text
return html
def get_chapter_infos(self, index_html):
"""提取章节信息"""
div = re.findall(r'<DIV class="clearfix dirconone">.*?</DIV>', index_html, re.S)
print(div)
if __name__ == '_main_':
novel_url = 'https://wwwquanshu.92kaifa.com/book/5/718/'
spider = NovelSpider() # 面向对象实例化
spider.get_novel(novel_url)
"D:\py paper\Scripts\python.exe" "D:/python/py paper/NovelSpider.py"
进程已结束,退出代码0