import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'
}
def get_data():
res=requests.get('https://movie.douban.com/top250',headers=headers)
soup=BeautifulSoup(res.text,'html.parser')
link_tags=soup.select('div.hd a')
for link in link_tags:
data=[]
names=link.select('span')[0].text
links=link['href']
for name in names:
data.append(name)
for link_name in links:
data.append(link_name)
return data
print(data)
def save_data(data):
新建工作簿
wb = Workbook()
选择默认的工作表
sheet = wb.active
给工作表重命名
sheet.title = '电影网址表'
写入多行数据
for row in data:
sheet.append(row)
保存 Excel 文件
wb.save('电影网址.xlsx')
save_data(get_data())