现在我的with open 是在线程中放着,可以正常运行但是会浪费很多操作。放在multi_threads()函数的开头系统,会报错:I/O无法操作已关闭的文件。
class Producer(threading.Thread):
def __init__(self,q_page_urls,q_infos,q_names,*args,**kwargs):
super(Producer,self).__init__(*args,**kwargs)
self.q_page_urls = q_page_urls
self.q_infos = q_infos
self.q_names = q_names
def run(self) -> None:
while True:
time.sleep(5)
url = self.q_page_urls.get()
resp = requests.get(url,headers=headers)
if resp.content:
reasult = resp.content.decode("utf-8")
soup = BeautifulSoup(reasult, 'lxml')
lis = soup.find("body").find_all_next("li")
for infos in lis:
contents = []
author_names = []
# 爬取作者
names = infos.find("a", class_="u-user-name")
if names is not None:
for name in names:
author_name = name.string
author_names.append(author_name)
self.q_names.put({"author_names":author_names})
# 爬取内容
info = infos.find("div", class_="j-r-list-c")
if info is not None:
for texts in info:
text = texts.find("a")
if text != -1:
content = text.string
contents.append(content)
self.q_infos.put({"contents":contents})
class Consumer(threading.Thread):
def __init__(self, q_infos,q_names, *args, **kwargs):
super(Consumer, self).__init__(*args, **kwargs)
self.q_infos = q_infos
self.q_names = q_names
def run(self) -> None:
while True:
with open("百思不得姐.csv", 'a', encoding="utf_8", newline='') as f:
wrieter = csv.writer(f)
info_obj = self.q_infos.get(timeout=100)
name_obj = self.q_names.get(timeout=100)
if name_obj.get("author_names") is not None:
wrieter.writerow(name_obj.get("author_names"))
if info_obj.get("contents") is not None:
wrieter.writerow(info_obj.get("contents"))
def multi_threads():
q_page_urls = queue.Queue(50)
q_infos = queue.Queue(100)
q_names = queue.Queue(100)
for i in range (1,51):
page_url = "http://www.budejie.com/text/%d" %i
q_page_urls.put(page_url)
for x in range(5):
th_1 = Producer(q_page_urls,q_infos,q_names)
th_1.start()
for y in range(20):
th_2 = Consumer(q_infos,q_names)
th_2.start()
if __name__ == '__main__':
multi_threads()