qq_67228075 2022-03-25 15:11 采纳率: 33.3%
浏览 26
已结题

能看看我这组代码该怎么改吗?

import requests
from lxml import etree
import asyncio
import aiohttp
import aiofiles
import os
def get_url(url):
resp=requests.get(url)
resp.encoding="utf-8"
resp_text=resp.text
result=[]
tree=etree.HTML(resp_text)
trss=tree.xpath('//div[@class="mulu"]/center/table')
for tab in trss:
chapter={}
trs=tab.xpath('./tr')
title=trs[0].xpath(".//text()")
titles="".join(title).strip()
hrefs_list=[]
for tr in trs[1:]:
href=tr.xpath('./td/a/@href')
hrefs_list.extend(href)
chapter['titles']=titles
chapter['hrefs_list']=hrefs_list
result.append(chapter)
async def download_chapter(name,hrefs):
if not os.path.exists(name):
os.makedirs(name)
tasks=[]
for href in hrefs:
t=asyncio.create_task(download_one(name,hrefs))
tasks.append(t)
await asyncio.wait(tasks)
async def download_one(name,href):
async with aiohttp.ClientSession()as session:
async with session.get(href)as resp:
page_source =await resp.text(encoding='utf-8')
tree=etree.HTML(page_source)
title_name = tree.xpath('/html/body/div[3]/h1/text()')[0].strip()
content="\n".join(tree.xpath('/html/body/div[3]/div[2]/p//text()'))
async with aiofiles.open(f"{name}/{title_name}.txt",mode="w",encoding="utf-8")as f:
await f.write(content)
print(title_name,"下载完成")

def main():
url ="https://www.mingchaonaxieshier.com/%22
chapters=get_url(url)
for chapter in chapters:
titles=chapter['titles']
hrefs_list=chapter['hrefs_list']
asyncio.run(download_chapter(titles,hrefs_list))

if name == 'main':
main()

  • 写回答

3条回答 默认 最新

  • 关注
    import asyncio
    import os
    
    import aiofiles
    import aiohttp
    import requests
    from lxml import etree
    
    
    def get_url(url):
        resp = requests.get(url)
        resp.encoding = "utf-8"
        resp_text = resp.text
        result = []
        tree = etree.HTML(resp_text)
        trss = tree.xpath('//div[@class="mulu"]/center/table')
        for tab in trss:
            chapter = {}
            trs = tab.xpath('./tr')
            title = trs[0].xpath(".//text()")
            titles = "".join(title).strip()
        hrefs_list = []
        for tr in trs[1:]:
            href = tr.xpath('./td/a/@href')
            hrefs_list.extend(href)
            chapter['titles'] = titles
            chapter['hrefs_list'] = hrefs_list
            result.append(chapter)
        return  result
    
    
    async def download_chapter(name,hrefs):
        if not os.path.exists(name):
            os.makedirs(name)
        tasks = []
        for href in hrefs:
            t = asyncio.create_task(download_one(name,href))
            tasks.append(t)
        await asyncio.wait(tasks)
    
    
    async def download_one(name,href):
        async with aiohttp.ClientSession() as session:
            async with session.get(href) as resp:
                page_source = await resp.text(encoding = 'utf-8')
                tree = etree.HTML(page_source)
                title_name = tree.xpath('/html/body/div[3]/h1/text()')[0].strip()
                content = "\n".join(tree.xpath('/html/body/div[3]/div[2]/p//text()'))
        async with aiofiles.open(f"{name}/{title_name}.txt",mode = "w",encoding = "utf-8") as f:
            await f.write(content)
        print(title_name,"下载完成")
    
    
    def main():
        url = "https://www.mingchaonaxieshier.com/%22"
    
    
        chapters = get_url(url)
        for chapter in chapters:
            titles = chapter['titles']
        hrefs_list = chapter['hrefs_list']
        asyncio.run(download_chapter(titles,hrefs_list))
    
    if __name__ == '__main__':
    
        main()
    

    img

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(2条)

报告相同问题?

问题事件

  • 系统已结题 4月19日
  • 已采纳回答 4月11日
  • 创建了问题 3月25日

悬赏问题

  • ¥23 (标签-bug|关键词-密码错误加密)
  • ¥66 比特币地址如何生成taproot地址
  • ¥20 数学建模数学建模需要
  • ¥15 关于#lua#的问题,请各位专家解答!
  • ¥15 什么设备可以研究OFDM的60GHz毫米波信道模型
  • ¥15 不知道是该怎么引用多个函数片段
  • ¥30 关于用python写支付宝扫码付异步通知收不到的问题
  • ¥15 隐藏系统界面pdf的打印、下载按钮
  • ¥15 基于pso参数优化的LightGBM分类模型
  • ¥15 安装Paddleocr时报错无法解决