Python运行代码发生异常: IndexError如何解决?
list index out of range
File "D:\ai2.py", line 21, in get_cat1_songlist_last_page
last_page_url = "https://music.163.com" + all_pages_urls[-1].split('"')[1]
File "D:\ai2.py", line 53, in find_cat1_cat2_songlist
url, num = get_cat1_songlist_last_page(cat1)
File "D:\ai2.py", line 127, in <module>
url = find_cat1_cat2_songlist("华语", "新歌")
IndexError: list index out of range
代码如下
```python
import re
import requests
import json
import openpyxl
from openpyxl.drawing.image import Image
wb = openpyxl.Workbook()
sheet = wb.active
def get_cat1_songlist_last_page(cat1: str):
url = 'https://music.163.com/discover/playlist/?cat=' + cat1
headers = {
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb;JSESSIONID-WYYY=HMyeRdf98eDm%2Bi%5CRnK9iB%5ChcSODhA%2Bh4jx5t3z20hhwTRsOCWhBS5Cpn%2B5j%5CVfMIu0i4bQY9sky%5CsvMmHhuwud2cDNbFRD%2FHhWHE61VhovnFrKWXfDAp%5CqO%2B6cEc%2B%2BIXGz83mwrGS78Goo%2BWgsyJb37Oaqr0IehSp288xn5DhgC3Cobe%3A1515585307035; _iuqxldmzr_=32; __utma=94650624.61181594.1515583507.1515583507.1515583507.1; __utmc=94650624; __utmz=94650624.1515583507.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=94650624.4.10.1515583507',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/63.0.3239.132 Safari/537.36' }
r = requests.get(url, headers=headers)
# reg1 = r'<ul class="f-hide"><li><a href="/song\?id=\d*?">.*</a></li></ul>'
reg1 = r'<a href=".*" class="zpgi">\d*?</a>'
all_pages_urls = re.compile(reg1).findall(r.text)
last_page_url = "https://music.163.com" + all_pages_urls[-1].split('"')[1]
last_page_number = int(all_pages_urls[-1].split('>')[1].split('<')[0])
return last_page_url, last_page_number
def get_page_songlist_ids(url: str):
headers = {
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb;JSESSIONID-WYYY=HMyeRdf98eDm%2Bi%5CRnK9iB%5ChcSODhA%2Bh4jx5t3z20hhwTRsOCWhBS5Cpn%2B5j%5CVfMIu0i4bQY9sky%5CsvMmHhuwud2cDNbFRD%2FHhWHE61VhovnFrKWXfDAp%5CqO%2B6cEc%2B%2BIXGz83mwrGS78Goo%2BWgsyJb37Oaqr0IehSp288xn5DhgC3Cobe%3A1515585307035; _iuqxldmzr_=32; __utma=94650624.61181594.1515583507.1515583507.1515583507.1; __utmc=94650624; __utmz=94650624.1515583507.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=94650624.4.10.1515583507',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/63.0.3239.132 Safari/537.36' }
r = requests.get(url, headers=headers)
reg1 = r'<a title="(.*)" href="/playlist\?id=(\d*?)" class="msk"></a>'
songlists = re.compile(reg1).findall(r.text)
songlists_ids = [i[1] for i in songlists]
reg2 = r'<a href="(.*?)" class="zbtn zprv">上一页</a>'
nxt_page = re.compile(reg2).findall(r.text)
return songlists_ids, "https://music.163.com" + nxt_page[0]
def get_songlist_tags(url: str):
headers = {
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb;JSESSIONID-WYYY=HMyeRdf98eDm%2Bi%5CRnK9iB%5ChcSODhA%2Bh4jx5t3z20hhwTRsOCWhBS5Cpn%2B5j%5CVfMIu0i4bQY9sky%5CsvMmHhuwud2cDNbFRD%2FHhWHE61VhovnFrKWXfDAp%5CqO%2B6cEc%2B%2BIXGz83mwrGS78Goo%2BWgsyJb37Oaqr0IehSp288xn5DhgC3Cobe%3A1515585307035; _iuqxldmzr_=32; __utma=94650624.61181594.1515583507.1515583507.1515583507.1; __utmc=94650624; __utmz=94650624.1515583507.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=94650624.4.10.1515583507',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/63.0.3239.132 Safari/537.36' }
r = requests.get(url, headers=headers)
reg1 = r'<a class="u-tag" href="/discover/playlist/\?cat=.*"><i>(.*)</i></a>'
tags = re.compile(reg1).findall(r.text)
return tags
def find_cat1_cat2_songlist(cat1: str, cat2: str, limit = 1000):
total = 0
url, num = get_cat1_songlist_last_page(cat1)
for i in range(num):
lists, nxt_page_url = get_page_songlist_ids(url)
url = nxt_page_url
cnt = 1
print("正在搜索第", num, "页", "总共", len(lists), "个歌单")
for id in lists:
tags = get_songlist_tags("https://music.163.com/playlist?id=" + str(id))
print(" 第%2d 个歌单标签为"%(cnt), str(tags), "url = ", "https://music.163.com/playlist?id=" + str(id))
cnt += 1
total += 1
if(cat2 in tags):
return "https://music.163.com/playlist?id=" + str(id)
num -= 1
def get_all_hotsongs(url: str):
headers = {
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb;JSESSIONID-WYYY=HMyeRdf98eDm%2Bi%5CRnK9iB%5ChcSODhA%2Bh4jx5t3z20hhwTRsOCWhBS5Cpn%2B5j%5CVfMIu0i4bQY9sky%5CsvMmHhuwud2cDNbFRD%2FHhWHE61VhovnFrKWXfDAp%5CqO%2B6cEc%2B%2BIXGz83mwrGS78Goo%2BWgsyJb37Oaqr0IehSp288xn5DhgC3Cobe%3A1515585307035; _iuqxldmzr_=32; __utma=94650624.61181594.1515583507.1515583507.1515583507.1; __utmc=94650624; __utmz=94650624.1515583507.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=94650624.4.10.1515583507',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/63.0.3239.132 Safari/537.36' }
r = requests.get(url, headers=headers)
reg1 = r'<ul class="f-hide"><li><a href="/song\?id=\d*?">.*</a></li></ul>'
result_contain_songs_ul = re.compile(reg1).findall(r.text)
result_contain_songs_ul = result_contain_songs_ul[0]
reg2 = r'<li><a href="/song\?id=\d*?">(.*?)</a></li>'
reg3 = r'<li><a href="/song\?id=(\d*?)">.*?</a></li>'
hot_songs_name = re.compile(reg2).findall(result_contain_songs_ul)
hot_songs_id = re.compile(reg3).findall(result_contain_songs_ul)
return hot_songs_name, hot_songs_id
def get_avatar(url, path):
headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "Cache-Control": "no-cache", "Connection": "keep-alive", "Host": "p1.music.126.net", "Pragma": "no-cache", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
}
r = requests.get(url, headers=headers)
with open(path, "wb") as f:
f.write(r.content)
def get_hotcommnets(hot_songs_name, hot_songs_id):
url = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_' + hot_songs_id + '?csrf_token='
headers = {
'Host': 'music.163.com',
'Proxy-Connection': 'keep-alive',
'Origin': 'http://music.163.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': '*/*',
'Referer': 'http://music.163.com/song?id=' + hot_songs_id + '',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh,zh-TW;q=0.9,en-US;q=0.8,en;q=0.7',
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb; _iuqxldmzr_=32; __utmc=94650624; __utmz=94650624.1515628584.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic;JSESSIONID-WYYY=TO%2BtUvrTWONNwB%2BgzDpfjFDiggKiS%2FfpMYNam%2BWGooHNka%2BwMhdsT%5CY%2Fn%2FpSMJwo4skFIK1T%2FNjd95lbGHWMQr5d5qcMRPB9SVKWK8UuBs1OGugZ4lFwipwjwWbCepSw%5CjWv31i1Qt%5CWWwtrFzzktj8CdCzniAw%5CgFCElUJnsQygY0MA%3A1515635604215; __utma=94650624.61181594.1515583507.1515630648.1515633862.4; __utmb=94650624.2.10.1515633862' }
data = {
'params':
'cG5yxYo1s0E9Eqv4QWJLM0fdPiJr0+GfKwqcGPulhOtGJ16gEBopaMhe6XeVNKDigMlpCaV7vrDNQLIOPIaTpAjlcJv +hjdCek6nL0ODfHt9ZEmtkTmU4r/+SA6Vno+o+c4EaPvhghNUXRMdVM/LltKvVanwOSvVhcqUPw9qij1d1akcxweLOWf1hKh2/q/m',
'encSecKey':
'a6c21ac04a44dca0e68174f9dfa85537a2694ecf7b43bdcd46a90836209a3d68008b430b54751bc0f56b12b6da38a265afcef1edbf687d70d1eb853144e920fea28e19a8c6145b7bad33e40d077e8a689b4bf67b367db815278af4ef227b02d85e609007106b7fc4a547bf96a1b90b0eda85bca6cc79ca6fc6559d00060d4184' }
response = requests.post(url, data=data, headers=headers)
hotcomments = json.loads(response.text)['hotComments']
sheet.column_dimensions['F'].width = 256
for i in range(len(hotcomments)):
user_name = hotcomments[i]['user']['nickname']
comment = hotcomments[i]['content']
like_num = hotcomments[i]['likedCount']
avatar_url = hotcomments[i]['user']['avatarUrl']
x = [hot_songs_name, hot_songs_id, user_name, comment, like_num]
sheet.append(x)
lst_row = sheet.max_row
lst_col = sheet.max_column
imgPath = "D:/pachong/img" + str(lst_row) + ".jpg"
get_avatar(avatar_url + "?param=256y256", imgPath)
img = Image(imgPath)
sheet.row_dimensions[lst_row].height = 256
sheet.add_image(img, "F" + str(lst_row))
url = find_cat1_cat2_songlist("华语", "新歌")
hot_songs_name, hot_songs_id = get_all_hotsongs(url)
num = 0
while num < len(hot_songs_name):
print('正在抓取网易云音乐第%d 首歌曲热评...' % (num + 1))
get_hotcommnets(hot_songs_name[num], hot_songs_id[num])
print('第%d 首歌曲热评抓取成功' % (num + 1))
num += 1
wb.save(filename='Formular1.xlsx')