qq_67228075 2022-03-25 15:11 采纳率: 33.3%
浏览 26
已结题

能看看我这组代码该怎么改吗?

import requests
from lxml import etree
import asyncio
import aiohttp
import aiofiles
import os
def get_url(url):
resp=requests.get(url)
resp.encoding="utf-8"
resp_text=resp.text
result=[]
tree=etree.HTML(resp_text)
trss=tree.xpath('//div[@class="mulu"]/center/table')
for tab in trss:
chapter={}
trs=tab.xpath('./tr')
title=trs[0].xpath(".//text()")
titles="".join(title).strip()
hrefs_list=[]
for tr in trs[1:]:
href=tr.xpath('./td/a/@href')
hrefs_list.extend(href)
chapter['titles']=titles
chapter['hrefs_list']=hrefs_list
result.append(chapter)
async def download_chapter(name,hrefs):
if not os.path.exists(name):
os.makedirs(name)
tasks=[]
for href in hrefs:
t=asyncio.create_task(download_one(name,hrefs))
tasks.append(t)
await asyncio.wait(tasks)
async def download_one(name,href):
async with aiohttp.ClientSession()as session:
async with session.get(href)as resp:
page_source =await resp.text(encoding='utf-8')
tree=etree.HTML(page_source)
title_name = tree.xpath('/html/body/div[3]/h1/text()')[0].strip()
content="\n".join(tree.xpath('/html/body/div[3]/div[2]/p//text()'))
async with aiofiles.open(f"{name}/{title_name}.txt",mode="w",encoding="utf-8")as f:
await f.write(content)
print(title_name,"下载完成")

def main():
url ="https://www.mingchaonaxieshier.com/%22
chapters=get_url(url)
for chapter in chapters:
titles=chapter['titles']
hrefs_list=chapter['hrefs_list']
asyncio.run(download_chapter(titles,hrefs_list))

if name == 'main':
main()

  • 写回答

3条回答 默认 最新

  • 关注
    import asyncio
    import os
    
    import aiofiles
    import aiohttp
    import requests
    from lxml import etree
    
    
    def get_url(url):
        resp = requests.get(url)
        resp.encoding = "utf-8"
        resp_text = resp.text
        result = []
        tree = etree.HTML(resp_text)
        trss = tree.xpath('//div[@class="mulu"]/center/table')
        for tab in trss:
            chapter = {}
            trs = tab.xpath('./tr')
            title = trs[0].xpath(".//text()")
            titles = "".join(title).strip()
        hrefs_list = []
        for tr in trs[1:]:
            href = tr.xpath('./td/a/@href')
            hrefs_list.extend(href)
            chapter['titles'] = titles
            chapter['hrefs_list'] = hrefs_list
            result.append(chapter)
        return  result
    
    
    async def download_chapter(name,hrefs):
        if not os.path.exists(name):
            os.makedirs(name)
        tasks = []
        for href in hrefs:
            t = asyncio.create_task(download_one(name,href))
            tasks.append(t)
        await asyncio.wait(tasks)
    
    
    async def download_one(name,href):
        async with aiohttp.ClientSession() as session:
            async with session.get(href) as resp:
                page_source = await resp.text(encoding = 'utf-8')
                tree = etree.HTML(page_source)
                title_name = tree.xpath('/html/body/div[3]/h1/text()')[0].strip()
                content = "\n".join(tree.xpath('/html/body/div[3]/div[2]/p//text()'))
        async with aiofiles.open(f"{name}/{title_name}.txt",mode = "w",encoding = "utf-8") as f:
            await f.write(content)
        print(title_name,"下载完成")
    
    
    def main():
        url = "https://www.mingchaonaxieshier.com/%22"
    
    
        chapters = get_url(url)
        for chapter in chapters:
            titles = chapter['titles']
        hrefs_list = chapter['hrefs_list']
        asyncio.run(download_chapter(titles,hrefs_list))
    
    if __name__ == '__main__':
    
        main()
    

    img

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(2条)

报告相同问题?

问题事件

  • 系统已结题 4月19日
  • 已采纳回答 4月11日
  • 创建了问题 3月25日

悬赏问题

  • ¥20 西门子S7-Graph,S7-300,梯形图
  • ¥50 用易语言http 访问不了网页
  • ¥50 safari浏览器fetch提交数据后数据丢失问题
  • ¥15 matlab不知道怎么改,求解答!!
  • ¥15 永磁直线电机的电流环pi调不出来
  • ¥15 用stata实现聚类的代码
  • ¥15 请问paddlehub能支持移动端开发吗?在Android studio上该如何部署?
  • ¥20 docker里部署springboot项目,访问不到扬声器
  • ¥15 netty整合springboot之后自动重连失效
  • ¥15 悬赏!微信开发者工具报错,求帮改