import re
import requests
from bs4 import BeautifulSoup
import bs4
url = 'https://movie.douban.com/chart'
head={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64)AppleWebKit/535.1(KHTML, like Gecko)Chrome/14.0.835.163 Safari/535.1'
}
response = requests.get(url,headers = head)
html = response.text
findLink = re.compile(r'<a class="nbg" href="(.?)" title=',re.S) # 影片链接规则
findTitle = re.compile(r'title="(.*?)">',re.S) # 影片片名
findRating = re.compile(r'rating_nums">(.?)',re.S) # 影片评分
findInq = re.compile(r'
(.*?)
',re.S) # 概况soup = BeautifulSoup(html,"html.parser")
datalist = []
for item in soup.find_all('div',class_=""):
#print(item)
data = []
item = str(item)
link = re.findall(findLink,item)[0] # findall返回的是列表
links = "链接:"+link
data.append(links)
title = re.findall(findTitle,item)[0]
titles = "电影:"+title
data.append(titles)
rating = re.findall(findRating,item)[0]
ratings = "评分:"+rating
data.append(ratings)
inq = re.findall(findInq,item)[0]
inqs = "概况:"+inq
data.append(inqs)
datalist.append(data) #处理好的一部电影信息
print(datalist)