Xxxxxxxxxxs 2020-02-26 08:50 采纳率: 0%
浏览 530

python多线程爬虫withopen自动关闭

现在我的with open 是在线程中放着,可以正常运行但是会浪费很多操作。放在multi_threads()函数的开头系统,会报错:I/O无法操作已关闭的文件。

class Producer(threading.Thread):
    def __init__(self,q_page_urls,q_infos,q_names,*args,**kwargs):
        super(Producer,self).__init__(*args,**kwargs)
        self.q_page_urls = q_page_urls
        self.q_infos = q_infos
        self.q_names = q_names

    def run(self) -> None:
        while True:
            time.sleep(5)
            url = self.q_page_urls.get()
            resp = requests.get(url,headers=headers)
            if resp.content:
                reasult = resp.content.decode("utf-8")
                soup = BeautifulSoup(reasult, 'lxml')
                lis = soup.find("body").find_all_next("li")
                for infos in lis:
                    contents = []
                    author_names = []
                    # 爬取作者
                    names = infos.find("a", class_="u-user-name")
                    if names is not None:
                        for name in names:
                            author_name = name.string
                            author_names.append(author_name)
                            self.q_names.put({"author_names":author_names})
                    # 爬取内容
                    info = infos.find("div", class_="j-r-list-c")
                    if info is not None:
                        for texts in info:
                            text = texts.find("a")
                            if text != -1:
                                content = text.string
                                contents.append(content)
                                self.q_infos.put({"contents":contents})


class Consumer(threading.Thread):
    def __init__(self,  q_infos,q_names, *args, **kwargs):
        super(Consumer, self).__init__(*args, **kwargs)
        self.q_infos = q_infos
        self.q_names = q_names

    def run(self) -> None:
        while True:
            with open("百思不得姐.csv", 'a', encoding="utf_8", newline='') as f:
                wrieter = csv.writer(f)
                info_obj = self.q_infos.get(timeout=100)
                name_obj = self.q_names.get(timeout=100)
                if name_obj.get("author_names") is not None:
                    wrieter.writerow(name_obj.get("author_names"))
                if info_obj.get("contents") is not None:
                    wrieter.writerow(info_obj.get("contents"))


def multi_threads():

    q_page_urls = queue.Queue(50)
    q_infos = queue.Queue(100)
    q_names = queue.Queue(100)

    for i in range (1,51):
        page_url = "http://www.budejie.com/text/%d" %i
        q_page_urls.put(page_url)

    for x in range(5):
        th_1 = Producer(q_page_urls,q_infos,q_names)
        th_1.start()

    for y in range(20):
        th_2 = Consumer(q_infos,q_names)
        th_2.start()


if __name__ == '__main__':
    multi_threads()
  • 写回答

1条回答 默认 最新

  • CSDN-Ada助手 CSDN-AI 官方账号 2022-09-09 19:38
    关注
    不知道你这个问题是否已经解决, 如果还没有解决的话:

    如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 以帮助更多的人 ^-^
    评论

报告相同问题?

悬赏问题

  • ¥15 HLs设计手写数字识别程序编译通不过
  • ¥15 Stata外部命令安装问题求帮助!
  • ¥15 从键盘随机输入A-H中的一串字符串,用七段数码管方法进行绘制。提交代码及运行截图。
  • ¥15 TYPCE母转母,插入认方向
  • ¥15 如何用python向钉钉机器人发送可以放大的图片?
  • ¥15 matlab(相关搜索:紧聚焦)
  • ¥15 基于51单片机的厨房煤气泄露检测报警系统设计
  • ¥15 Arduino无法同时连接多个hx711模块,如何解决?
  • ¥50 需求一个up主付费课程
  • ¥20 模型在y分布之外的数据上预测能力不好如何解决