import requests
import json
import re
import csv
获取每个电影分类链接的函数
def get_movie_type():
url='https://movie.douban.com/chart'
resp=requests.get(url)
result=re.findall(r'(.*?)',resp.text)
return result
获取每种电影前100部:
def get_movie_rank(type_url):
type_url = 'https://movie.douban.com/j/chart/top_list?' + type_url[23:] + '&start=0&limit=100'
resp=requests.get(type_url)
return resp.text
储存成CSV格式 数据包括:电影名,类型,国家或地区,上映日期,评分,排行
def save(movie):
with open(r'C:\Users\Administrator\Desktop\python\豆瓣电影分类前100.csv', 'w', newline='', encoding='utf8') as f:
csv.writer(f).writerow(['类型','电影名称','地区','上映日期','评分','排行','地址'])
csv.writer(f).writerow(movie)
def run():
types_url=get_movie_type()
for url in types_url:
movie_type_name = url[1]
print('正在储存'+movie_type_name+'类型的电影')
type_url = url[0]
result=get_movie_rank(type_url)
#json解析
result=json.loads(result)
#获取所需电影信息
for massage in result:
title=massage['title']
regions=massage['regions'][0]
release_date=massage['release_date']
score=massage['score']
rank = massage['rank']
url = massage['url']
movie = [movie_type_name, title, regions, release_date, score, rank, url]
# 储存包含电影信息
save(movie)
print(movie_type_name + '的类型储存完毕!')
run()