做一个爬取小说的网站
网站要先注册,登录
网站要有搜索引擎,输入小说名就可以下载整本小说,依章节分序下载到txt
经过前面几步,可以将文本小说转化成语音小说,并保存到本地的音频文件中(如:点击一个章节,这章就可转成语音)
转成语音后,在网站上读,可以暂停
美化网站,以上都要在网站上进行,使用python,提供源代码,要求可以运行成功
(没有诚意就不要回答了)
爬虫的有声小说制作(用python)
- 写回答
- 好问题 0 提建议
- 关注问题
- 邀请回答
-
14条回答 默认 最新
threenewbee 2023-09-07 15:21关注import requests import re from urllib.parse import urljoin import urllib3 from lxml import etree urllib3.disable_warnings()#解决warning class Tingshu_bao_spider: def do_get_request(self,url): """ 发送网络请求,获取网页源代码 :param url: :return: """ headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36", "Referer":url} try: r=requests.get(url,headers=headers,timeout=6) if r.status_code==200: r.encoding=r.apparent_encoding html=r.text return html else: return False except: return False def get_novel_detail(self,sound_link): """ 获取小说详情 :param sound_link: :return: """ novel_detail_item={} html=self.do_get_request(sound_link) if html: res=etree.HTML(html) name=res.xpath('//div[@class="book-cell"]/h1[@class="book-title"]/text()') if name: novel_detail_item['novel_name']=name[0].split("有声小说简介:")[0] else: novel_detail_item['novel_name']="未知" cover=res.xpath('//div[@class="book"]/img[@class="book-cover"]/@src') if cover: novel_detail_item['novel_cover']=urljoin(sound_link,cover[0]) else: novel_detail_item['novel_cover']="未知" datas=res.xpath('//div[@class="book-rand-a"]//text()') if datas: novel_detail_item['novel_type'] = datas[1] novel_detail_item['novel_status'] = datas[3] novel_detail_item['novel_update_time'] = datas[-1] else: novel_detail_item['novel_type']="未知" novel_detail_item['novel_status'] = "未知" novel_detail_item['novel_update_time'] = "未知" #作者 data2 = res.xpath('//div[@class="book-des"]/p/a/text()') if data2: novel_detail_item['novel_author'] = data2[0] novel_detail_item['novel_anchor'] = data2[-1] else: novel_detail_item['novel_author']="未知" novel_detail_item['novel_anchor']="未知" introduce = res.xpath('//div[@class="book-des"]/text()') if introduce: novel_detail_item['novel_introduce'] = introduce[0] else: novel_detail_item['novel_introduce']="未知" selector=res.xpath('//div[@id="playlist"]/ul/li') play_list=[] for data in selector: play_item={} novel_play_name=data.xpath("./a/@title") if novel_play_name: play_item["play_name"]=novel_play_name[0] else: play_item["play_name"]="NULL" novel_play_link = data.xpath("./a/@href") if novel_play_name: play_item["play_link"] = urljoin(sound_link,novel_play_link[0]) else: play_item["play_link"]="NULL" play_list.append(play_item) novel_detail_item['play_list']=play_list return novel_detail_item else: return False def get_audio_play_link(self,detail_intro_link): """ 获取小说播放链接地址 :param detail_intro_link: :return: """ html=self.do_get_request(detail_intro_link) if html: base_url="https://t3344t.tingchina.com/" aim_asciis=re.findall("FonHen_JieMa\('(.*?)'",html) if aim_asciis: sp = aim_asciis[0].split("*") res = "" for s in sp: if s != "": if "-" in s: res += chr((int(~int(s.replace("-", '')) & 0xffff) + 1)) else: res += chr(int(s)) aim_suffix = "/" + res.split('&')[0].split('/', 1)[-1] play_url=urljoin(base_url,aim_suffix) return play_url else: return False else: return False if __name__ == '__main__': t=Tingshu_bao_spider() aim_url='http://m.tingshubao.com/book/2267.html' print(t.get_novel_detail(aim_url)) print(t.get_audio_play_link('http://m.tingshubao.com/video/?2267-0-0.html'))解决 无用评论 打赏 举报