代码如下
写一半测试时发现返回一堆空列表
```python
import sys
import re
import urllib.request
import xlwt
from bs4 import BeautifulSoup
#定义正则表达式筛选规则
findLink=re.compile(r'<a href="(.*?)" target="_blank" class="title">.*?</a>',re.S)#视频链接
#获取对应url网页的数据
def get_url(url):
head = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36 Edg/96.0.1054.34"}#模拟计算机访问网页
request = urllib.request.Request(url,headers=head)#以headers身份访问url网页
html=''
try:
reponse = urllib.request.urlopen(request)
html = reponse.read().decode('utf-8')
except urllib.error.URLError as e:
if hasattr(e,'code'):
print(e.code)
if hasattr(e,'reason'):
print(e.reason)
return html
#调用get_url函数获取指定网页数据,以html形式存储
def get_data(baseurl):
data_list=[]
html=get_url(baseurl)#获取get_url爬到的数据
soup=BeautifulSoup(html,'html.parser')#定义使用html解读器解读数据的变量soup
for item in soup.find_all('div',class_="content"):
item=str(item)
link=re.findall(findLink,item)#寻找链接
#data_list.append(link)#保存链接
print(link)
return data_list
get_data('https://www.bilibili.com/v/popular/rank/all')