xiaoqiu666 2022-10-26 14:01 采纳率: 66.7%
浏览 46
已结题

下面这段代码有bug,我调试不出来

请问下面这段代码的bug在哪里
运行了几次无法完全运行
from urllib import request
from bs4 import BeautifulSoup
import re
import sys

if name == "main":
#创建txt文件
file = open('一念永恒.txt', 'w', encoding='utf-8')
#一念永恒小说目录地址
target_url = 'http://www.biqukan.com/1_1094/'
head = {}
head['User-Agent'] = 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19'
target_req = request.Request(url = target_url, headers = head)
target_response = request.urlopen(target_req)
target_html = target_response.read().decode('gbk','ignore')
listmain_soup = BeautifulSoup(target_html)
#找出div标签中class为listmain的所有子标签
chapters = listmain_soup.find_all('div',class_ = 'listmain')
download_soup = BeautifulSoup(str(chapters))
#计算章节个数
numbers = (len(download_soup.dl.contents) - 1) / 2 - 8
index = 1
begin_flag = False
for child in download_soup.dl.children:
if child != '\n':
#找到《一念永恒》正文卷
if child.string == u"《一念永恒》正文卷":
begin_flag = True
#爬取链接并下载链接内容
if begin_flag == True and child.a != None:
download_url = "http://www.biqukan.com" + child.a.get('href')
download_req = request.Request(url = download_url, headers = head)
download_response = request.urlopen(download_req)
download_html = download_response.read().decode('gbk','ignore')
download_name = child.string
soup_texts = BeautifulSoup(download_html)
texts = soup_texts.find_all(id = 'content', class_ = 'showtxt')
soup_text = BeautifulSoup(str(texts))
write_flag = True
file.write(download_name + '\n\n')
#将爬取内容写入文件
for each in soup_text.div.text.replace('\xa0',''):

                if each == 'h':
                    write_flag = False
                if write_flag == True and each != ' ':
                    file.write(each)
                if write_flag == True and each == '\r':
                    file.write('\n')
                print('正在写入第{0}小节'.format(index))
                index+=1
            file.write('\n\n')
            #打印爬取进度
            sys.stdout.write("已下载:%.3f%%" % float(index/numbers) + '\r')
            sys.stdout.flush()
            index += 1
file.close()
  • 写回答

2条回答 默认 最新

  • CSDN专家-showbo 2022-10-26 14:11
    关注

    发代码时用工具栏的</>按钮格式化下,要不python没缩进没法看这个代码。。

    #from urllib import request
    from bs4 import BeautifulSoup
    import re
    import sys
    import requests
    
    
    #创建txt文件
    file = open('一念永恒.txt', 'w', encoding='utf-8')
    #一念永恒小说目录地址
    target_url = 'http://www.biqukan.com/1_1094/'
    head = {}
    head['User-Agent'] = 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19'
    
    target_html =requests.get(target_url,headers=head).content.decode('gbk')
    print(target_html)
    listmain_soup = BeautifulSoup(target_html,features="html.parser")
    #找出div标签中class为listmain的所有子标签
    chapters = listmain_soup.find_all('div',class_ = 'listmain')
    download_soup = BeautifulSoup(str(chapters),features="html.parser")
    
    #计算章节个数
    numbers = (len(download_soup.dl.contents) - 1) / 2 - 8
    index = 1
    begin_flag = False
    for child in download_soup.dl.children:
        if child != '\n':
        #找到《一念永恒》正文卷
            if child.string == "《一念永恒》正文卷":
                begin_flag = True
                #爬取链接并下载链接内容
            if begin_flag == True and child.a != None:
                download_url = "http://www.biqukan.com" + child.a.get('href')
                download_html = requests.get(download_url).content.decode('gbk','ignore')
                download_name = child.string
                soup_texts = BeautifulSoup(download_html,features="html.parser")
                texts = soup_texts.find_all(id = 'content', class_ = 'showtxt')
                print(texts)
                soup_text = BeautifulSoup(str(texts),features="html.parser")
                write_flag = True
                file.write(download_name + '\n\n')
                #将爬取内容写入文件
                for each in soup_text.div.text.replace('\xa0',''):
    
                    if each == 'h':
                        write_flag = False
                    if write_flag == True and each != ' ':
                        file.write(each)
                    if write_flag == True and each == '\r':
                        file.write('\n')
                    print('正在写入第{0}小节'.format(index))
                    index+=1
                file.write('\n\n')
                #打印爬取进度
                sys.stdout.write("已下载:%.3f%%" % float(index/numbers) + '\r')
                sys.stdout.flush()
                index += 1
    file.close()
    
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论 编辑记录
查看更多回答(1条)

报告相同问题?

问题事件

  • 系统已结题 11月3日
  • 已采纳回答 10月26日
  • 创建了问题 10月26日

悬赏问题

  • ¥15 关于#Java#的问题,如何解决?
  • ¥15 加热介质是液体,换热器壳侧导热系数和总的导热系数怎么算
  • ¥15 想问一下树莓派接上显示屏后出现如图所示画面,是什么问题导致的
  • ¥100 嵌入式系统基于PIC16F882和热敏电阻的数字温度计
  • ¥15 cmd cl 0x000007b
  • ¥20 BAPI_PR_CHANGE how to add account assignment information for service line
  • ¥500 火焰左右视图、视差(基于双目相机)
  • ¥100 set_link_state
  • ¥15 虚幻5 UE美术毛发渲染
  • ¥15 CVRP 图论 物流运输优化