目标网站:https://sc.chinaz.com/yinxiao/
需求:
1、翻页爬网页上的音乐名字,音乐链接
2、保存到csv
import requests
from lxml import etree
import csv
start = int(input('请输入你的起始页:'))
end = int(input('请输入你的结束页:'))
for k in range(start, end+1):
url = 'https://sc.chinaz.com/yinxiao/index_{}.html'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'}
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
data = response.text
print(data)
html = etree.HTML(data)
div_tag = html.xpath('//div[@class="right-head"]/a')
lis = []
for a in div_tag:
name = a.xpath('./p/text()')
href = a.xpath('./@href')
# print(name, href)
name = [s.strip() for s in name]
for i in zip(name, href):
dic = {}
dic['name'] = i[0]
dic['href'] = 'https://sc.chinaz.com'+i[1]
lis.append(dic)
print(lis)
with open('音效.csv', 'w', encoding='utf-8', newline='') as f:
write = csv.DictWriter(f, fieldnames=['name', 'href'])
write.writeheader()
write.writerows(lis)