这也不知道什么情况,一页的数据全存在一行了,总共爬了3页,存了3行数据
```python
import requests
from lxml import etree
import pandas as pd
import pyttsx3
def star_data(page):
url = 'https://search.bilibili.com/all?keyword=web%E6%B8%97%E9%80%8F&from_sourc=webtop_search&spm_id_from=333.851&page={}'.format(page)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'}
response = requests.get(url, headers=headers)
print('获取数据中')
if response.status_code == 200:
return response
else:
print('服务器已实行反爬')
def send_data(page):
print('正在获取第{}页数据'.format(page))
dict={'name':[],
'time':[],
'href':[]
}
list1=[]
for i in range(1,page+1):
response=star_data(i)
html = etree.HTML(response.text)
for item in html.xpath('//ul[@class="video-list clearfix"]'):
name=item.xpath('//a[@class="title"]/text()')
dict['name'].append(','.join(name))
time=html.xpath('//span[@class="so-imgTag_rb"]/text()')
dict['time'].append(','.join(time))
href=html.xpath('//a[@class="title"]/@href')
dict['href'].append(','.join(href))
# print(dict)
return dict
def clean_data(page):
data=send_data(page)
fd=pd.DataFrame(data)
print('存储第{}页数据中....'.format(page))
fd.to_csv('b站数据.csv',header=True,index=False)
if __name__=='__main__':
page=int(input('请输入要爬取的页数'))
clean_data(page)
k=1
while k<4:
pyttsx3.speak('爬取数据结束')
k+=1
```