artais 2024-06-03 21:07 采纳率: 63.2%
浏览 19
已结题

Python发生IndexError错误如何解决?

Python运行代码发生异常: IndexError如何解决?

list index out of range
  File "D:\ai2.py", line 21, in get_cat1_songlist_last_page
    last_page_url = "https://music.163.com" + all_pages_urls[-1].split('"')[1]
  File "D:\ai2.py", line 53, in find_cat1_cat2_songlist
    url, num = get_cat1_songlist_last_page(cat1)
  File "D:\ai2.py", line 127, in <module>
    url = find_cat1_cat2_songlist("华语", "新歌")
IndexError: list index out of range

代码如下
```python
import re
import requests
import json
import openpyxl
from openpyxl.drawing.image import Image
wb = openpyxl.Workbook()
sheet = wb.active

def get_cat1_songlist_last_page(cat1: str):
    url = 'https://music.163.com/discover/playlist/?cat=' + cat1
    headers = {
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb;JSESSIONID-WYYY=HMyeRdf98eDm%2Bi%5CRnK9iB%5ChcSODhA%2Bh4jx5t3z20hhwTRsOCWhBS5Cpn%2B5j%5CVfMIu0i4bQY9sky%5CsvMmHhuwud2cDNbFRD%2FHhWHE61VhovnFrKWXfDAp%5CqO%2B6cEc%2B%2BIXGz83mwrGS78Goo%2BWgsyJb37Oaqr0IehSp288xn5DhgC3Cobe%3A1515585307035; _iuqxldmzr_=32; __utma=94650624.61181594.1515583507.1515583507.1515583507.1; __utmc=94650624; __utmz=94650624.1515583507.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=94650624.4.10.1515583507',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/63.0.3239.132 Safari/537.36' }
    r = requests.get(url, headers=headers)
    # reg1 = r'<ul class="f-hide"><li><a href="/song\?id=\d*?">.*</a></li></ul>'
    reg1 = r'<a href=".*" class="zpgi">\d*?</a>'
    all_pages_urls = re.compile(reg1).findall(r.text)
    last_page_url = "https://music.163.com" + all_pages_urls[-1].split('"')[1]
    last_page_number = int(all_pages_urls[-1].split('>')[1].split('<')[0])
    return last_page_url, last_page_number


def get_page_songlist_ids(url: str):
    headers = {
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb;JSESSIONID-WYYY=HMyeRdf98eDm%2Bi%5CRnK9iB%5ChcSODhA%2Bh4jx5t3z20hhwTRsOCWhBS5Cpn%2B5j%5CVfMIu0i4bQY9sky%5CsvMmHhuwud2cDNbFRD%2FHhWHE61VhovnFrKWXfDAp%5CqO%2B6cEc%2B%2BIXGz83mwrGS78Goo%2BWgsyJb37Oaqr0IehSp288xn5DhgC3Cobe%3A1515585307035; _iuqxldmzr_=32; __utma=94650624.61181594.1515583507.1515583507.1515583507.1; __utmc=94650624; __utmz=94650624.1515583507.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=94650624.4.10.1515583507',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/63.0.3239.132 Safari/537.36' }
    r = requests.get(url, headers=headers)
    reg1 = r'<a title="(.*)" href="/playlist\?id=(\d*?)" class="msk"></a>'
    songlists = re.compile(reg1).findall(r.text)
    songlists_ids = [i[1] for i in songlists]
    reg2 = r'<a href="(.*?)" class="zbtn zprv">上一页</a>'
    nxt_page = re.compile(reg2).findall(r.text)
    return songlists_ids, "https://music.163.com" + nxt_page[0]
def get_songlist_tags(url: str):
    headers = {
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb;JSESSIONID-WYYY=HMyeRdf98eDm%2Bi%5CRnK9iB%5ChcSODhA%2Bh4jx5t3z20hhwTRsOCWhBS5Cpn%2B5j%5CVfMIu0i4bQY9sky%5CsvMmHhuwud2cDNbFRD%2FHhWHE61VhovnFrKWXfDAp%5CqO%2B6cEc%2B%2BIXGz83mwrGS78Goo%2BWgsyJb37Oaqr0IehSp288xn5DhgC3Cobe%3A1515585307035; _iuqxldmzr_=32; __utma=94650624.61181594.1515583507.1515583507.1515583507.1; __utmc=94650624; __utmz=94650624.1515583507.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=94650624.4.10.1515583507',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/63.0.3239.132 Safari/537.36' }
    r = requests.get(url, headers=headers)
    reg1 = r'<a class="u-tag" href="/discover/playlist/\?cat=.*"><i>(.*)</i></a>'
    tags = re.compile(reg1).findall(r.text)
    return tags
def find_cat1_cat2_songlist(cat1: str, cat2: str, limit = 1000):
    total = 0
    url, num = get_cat1_songlist_last_page(cat1)
    for i in range(num):
          lists, nxt_page_url = get_page_songlist_ids(url)
          url = nxt_page_url
          cnt = 1
          print("正在搜索第", num, "页", "总共", len(lists), "个歌单")
          for id in lists:
               tags = get_songlist_tags("https://music.163.com/playlist?id=" + str(id))
               print(" 第%2d 个歌单标签为"%(cnt), str(tags), "url = ", "https://music.163.com/playlist?id=" + str(id))
               cnt += 1
               total += 1
               if(cat2 in tags):
                  return "https://music.163.com/playlist?id=" + str(id)
          num -= 1
    
def get_all_hotsongs(url: str):
    headers = {
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb;JSESSIONID-WYYY=HMyeRdf98eDm%2Bi%5CRnK9iB%5ChcSODhA%2Bh4jx5t3z20hhwTRsOCWhBS5Cpn%2B5j%5CVfMIu0i4bQY9sky%5CsvMmHhuwud2cDNbFRD%2FHhWHE61VhovnFrKWXfDAp%5CqO%2B6cEc%2B%2BIXGz83mwrGS78Goo%2BWgsyJb37Oaqr0IehSp288xn5DhgC3Cobe%3A1515585307035; _iuqxldmzr_=32; __utma=94650624.61181594.1515583507.1515583507.1515583507.1; __utmc=94650624; __utmz=94650624.1515583507.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=94650624.4.10.1515583507',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/63.0.3239.132 Safari/537.36' }
    r = requests.get(url, headers=headers)
    reg1 = r'<ul class="f-hide"><li><a href="/song\?id=\d*?">.*</a></li></ul>'
    result_contain_songs_ul = re.compile(reg1).findall(r.text)
    result_contain_songs_ul = result_contain_songs_ul[0]
    reg2 = r'<li><a href="/song\?id=\d*?">(.*?)</a></li>'
    reg3 = r'<li><a href="/song\?id=(\d*?)">.*?</a></li>'
    hot_songs_name = re.compile(reg2).findall(result_contain_songs_ul)
    hot_songs_id = re.compile(reg3).findall(result_contain_songs_ul)
    return hot_songs_name, hot_songs_id
def get_avatar(url, path):
    headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "Cache-Control": "no-cache", "Connection": "keep-alive", "Host": "p1.music.126.net", "Pragma": "no-cache", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
}
    r = requests.get(url, headers=headers)
    with open(path, "wb") as f:
         f.write(r.content)
def get_hotcommnets(hot_songs_name, hot_songs_id):
    url = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_' + hot_songs_id + '?csrf_token='
    headers = {
'Host': 'music.163.com',
'Proxy-Connection': 'keep-alive',
'Origin': 'http://music.163.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': '*/*',
'Referer': 'http://music.163.com/song?id=' + hot_songs_id + '',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh,zh-TW;q=0.9,en-US;q=0.8,en;q=0.7',
'Cookie': '__e_=1515461191756; _ntes_nnid=af802a7dd2cafc9fef605185da6e73fb,1515461190617; _ntes_nuid=af802a7dd2cafc9fef605185da6e73fb; _iuqxldmzr_=32; __utmc=94650624; __utmz=94650624.1515628584.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic;JSESSIONID-WYYY=TO%2BtUvrTWONNwB%2BgzDpfjFDiggKiS%2FfpMYNam%2BWGooHNka%2BwMhdsT%5CY%2Fn%2FpSMJwo4skFIK1T%2FNjd95lbGHWMQr5d5qcMRPB9SVKWK8UuBs1OGugZ4lFwipwjwWbCepSw%5CjWv31i1Qt%5CWWwtrFzzktj8CdCzniAw%5CgFCElUJnsQygY0MA%3A1515635604215; __utma=94650624.61181594.1515583507.1515630648.1515633862.4; __utmb=94650624.2.10.1515633862' }
    data = {
'params':
'cG5yxYo1s0E9Eqv4QWJLM0fdPiJr0+GfKwqcGPulhOtGJ16gEBopaMhe6XeVNKDigMlpCaV7vrDNQLIOPIaTpAjlcJv +hjdCek6nL0ODfHt9ZEmtkTmU4r/+SA6Vno+o+c4EaPvhghNUXRMdVM/LltKvVanwOSvVhcqUPw9qij1d1akcxweLOWf1hKh2/q/m',
'encSecKey':
'a6c21ac04a44dca0e68174f9dfa85537a2694ecf7b43bdcd46a90836209a3d68008b430b54751bc0f56b12b6da38a265afcef1edbf687d70d1eb853144e920fea28e19a8c6145b7bad33e40d077e8a689b4bf67b367db815278af4ef227b02d85e609007106b7fc4a547bf96a1b90b0eda85bca6cc79ca6fc6559d00060d4184' }
    response = requests.post(url, data=data, headers=headers)
    hotcomments = json.loads(response.text)['hotComments']
    sheet.column_dimensions['F'].width = 256
    for i in range(len(hotcomments)):
          user_name = hotcomments[i]['user']['nickname']
          comment = hotcomments[i]['content']
          like_num = hotcomments[i]['likedCount']
          avatar_url = hotcomments[i]['user']['avatarUrl']
          x = [hot_songs_name, hot_songs_id, user_name, comment, like_num]
          sheet.append(x)
          lst_row = sheet.max_row
          lst_col = sheet.max_column
    
          imgPath = "D:/pachong/img" + str(lst_row) + ".jpg"
          get_avatar(avatar_url + "?param=256y256", imgPath)
          img = Image(imgPath)
          sheet.row_dimensions[lst_row].height = 256
          sheet.add_image(img, "F" + str(lst_row))
          
url = find_cat1_cat2_songlist("华语", "新歌")
hot_songs_name, hot_songs_id = get_all_hotsongs(url)
num = 0
while num < len(hot_songs_name):
    print('正在抓取网易云音乐第%d 首歌曲热评...' % (num + 1))
    get_hotcommnets(hot_songs_name[num], hot_songs_id[num])
    print('第%d 首歌曲热评抓取成功' % (num + 1))
    num += 1
wb.save(filename='Formular1.xlsx')



  • 写回答

19条回答 默认 最新

  • yy64ll826 2024-06-04 15:22
    关注

    【Python】成功解决IndexError: list index out of range

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(18条)

报告相同问题?

问题事件

  • 系统已结题 6月18日
  • 已采纳回答 6月10日
  • 创建了问题 6月3日