from bs4 import BeautifulSoup #网页解析,获取数字
import re #正则表达式,进行文字匹配
import urllib.request,urllib.error #制定url,获取网页数据
import xlwt #进行Excel操作
import sqlite3 #进行sqlite
def main():
print("开始爬取....")
baseurl = "https://movie.douban.com/top250?star="
askURL("https://movie.douban.com/top250?star=") #1.爬取网页
datalist = getData(baseurl) #一个网址,获取数据
savepath = "./豆瓣电影Top250.xls" #一个路径
def askURL(url):
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36 Edg/89.0.774.75"
}
request = urllib.request.Request(url,headers=head)
html = ""