from bs4 import BeautifulSoup
from gevent.queue import Queue
from gevent import monkey
monkey.patch_all()
import gevent,time,requests,csv
f = open('电影.csv','a',newline='',encoding='utf-8')
writer = csv.writer(f)
writer.writerow(['电影名','导演','主演','简介'])
url_lists = ['http://www.mtime.com/top/tv/top100/']
for i in range(1,10):
url = 'http://www.mtime.com/top/tv/top100/index-'+str(i+1)+'.html'
url_lists.append(url)
start = time.time()
work = Queue()
for x in url_lists:
work.put_nowait(url)
def crawler(url):
while not work.empty():
url = work.get_nowait()
r = requests.get(url)
print(r.status_code)
soup = BeautifulSoup(r.text,'html.parser')
movies_list = soup.find('div',class_="mov_con")
for movie in movies_list:
name = movie.find('a').text
tag_p = movie.find_all('p')
list1 = []
if len(tag_p) == 1:
list1=[str(tag_p[0].text),'none','none']
elif len(tag_p) == 2:
list1 = [str(tag_p[0].text),str(tag_p[1].text),'none']
else:
for i in tag_p:
peo = i.text
list1.append(peo)
writer.writerow([name,list1[0],list1[1],list1[2]])
f.close()
tasks_list = []
for x in range(2):
task = gevent.spawn(crawler,url)
tasks_list.append(task)
gevent.joinall(tasks_list)
end = time.time()
print(end-start)
报错
Traceback (most recent call last):
File "src/gevent/greenlet.py", line 766, in gevent._greenlet.Greenlet.run
File "e:/python code/python爬虫精进/关卡作业/第十一关作业.py", line 34, in crawler
name = movie.find('a').text
AttributeError: 'int' object has no attribute 'text'
2020-11-24T10:02:43Z <Greenlet at 0x1ae03e0b268: crawler('http://www.mtime.com/top/tv/top100/index-10.html')> failed with AttributeError
python新手求大佬指教