 while num < len(new_song_name):    # 保存所有新歌榜中的热评
        print('\n ===============正在抓取第%d首歌曲热评...' % (num+1))
        # 热门歌曲评论很多,每首爬取最新的70页评论
        B.get_all_comments(new_song_id[num], 70 , new_song_name[num], num+1)
        print('第%d首歌曲热评抓取成功*********************** \n' % (num+1))
        num += 1


File "D:/Pycharm/project6/teacher_test1.py", line 434, in <module>
    B.get_all_comments(new_song_id[num], 70 , new_song_name[num], num+1)
TypeError: get_all_comments() takes 4 positional arguments but 5 were given



import os
import re
import math
import random
import urllib.request
import urllib.error
import urllib.parse
from Crypto.Cipher import AES
import base64
import requests
import json
import time
import csv

agents = [
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/ Safari/537.1",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"

headers = {
    'User-Agent':''.join(random.sample(agents, 1))

# 除了第一个参数,其他参数为固定参数,可以直接套用
# offset的取值为:(评论页数-1)*20,total第一页为true,其余页为false
# 第一个参数
# first_param = '{rid:"", offset:"0", total:"true", limit:"20", csrf_token:""}'
# 第二个参数
second_param = "010001"
# 第三个参数
third_param = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
# 第四个参数
forth_param = "0CoJUm6Qyw8W8jud"

# 获取参数
def get_params(page):  # page为传入页数
    iv = "0102030405060708"
    first_key = forth_param
    second_key = 16 * b'F'
    if(page == 1):  # 如果为第一页
        first_param = '{rid:"", offset:"0", total:"true", limit:"20", csrf_token:""}'
        h_encText = AES_encrypt(first_param, first_key, iv)
        offset = str((page-1)*20)
        first_param = '{rid:"", offset:"%s", total:"%s", limit:"20", csrf_token:""}' % (offset,'false')
        h_encText = AES_encrypt(first_param, first_key, iv)
    h_encText = AES_encrypt(h_encText, second_key, iv)
    return h_encText

# 获取 encSecKey
def get_encSecKey():
    encSecKey = "257348aecb5e556c066de214e531faadd1c55d814f9be95fd06d6bff9f4c7a41f831f6394d5a3fd2e3881736d94a02ca919d952872e7d0a50ebfa1769a7a62d512f5f1ca21aec60bc3819a9c3ffca5eca9a0dba6d6f7249b06f5965ecfff3695b54e1c28f3f624750ed39e7de08fc8493242e26dbc4484a01c76f739e135637c"
    return encSecKey

# 解密过程
def AES_encrypt(text, key, iv):
    pad = 16 - len(text) % 16
    text = text + pad * chr(pad)
    encryptor = AES.new(key, AES.MODE_CBC, iv)
    encrypt_text = encryptor.encrypt(text.encode('utf-8'))
    encrypt_text = base64.b64encode(encrypt_text)
    encrypt_text = str(encrypt_text, encoding="utf-8")  # 注意一定要加上这一句,没有这一句则出现错误
    return encrypt_text

# 获得评论json数据
def get_json(url, params, encSecKey):
    data = {
        "params": params,
        "encSecKey": encSecKey
    response = requests.post(url, headers=headers, data=data)
    return response.content.encode('utf-8')  # 解码

# 获取新歌榜所有歌曲名称和id
def get_all_newSong():
    url = 'https://music.163.com/discover/toplist?id=3779629'    # 网易云云音乐新歌榜url
    header = {'User-Agent': ''.join(random.sample(agents, 1))}  # random.sample() 的值是列表, ''.join()转列表为字符串
    request = urllib.request.Request(url=url, headers=header)
    html = urllib.request.urlopen(request).read().decode('utf8')   # 打开url
    html = str(html)     # 转换成str
    # print(html)
    pat1 = r'<ul class="f-hide"><li><a href="/song\?id=\d*?">.*</a></li></ul>'  # 进行第一次筛选的正则表达式
    result = re.compile(pat1).findall(html)     # 用正则表达式进行筛选
    # print(result)
    result = result[0]     # 获取tuple的第一个元素

    pat2 = r'<li><a href="/song\?id=\d*?">(.*?)</a></li>'  # 进行歌名筛选的正则表达式
    pat3 = r'<li><a href="/song\?id=(\d*?)">.*?</a></li>'   # 进行歌ID筛选的正则表达式
    new_song_name = re.compile(pat2).findall(result)     # 获取所有热门歌曲名称
    new_song_id = re.compile(pat3).findall(result)     # 获取所有热门歌曲对应的Id
    # print(new_song_name)
    # print(new_song_id)
    return new_song_name, new_song_id

# 抓取某一首歌的前page页评论

def get_all_comments(self,new_song_id, page, new_song_name, new_song_order):  # new_song_order为了给文件命名添加一个编号
    all_comments_list = []  # 存放所有评论
    url = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_' + new_song_id + '?csrf_token='  # 歌评url
    params = get_params(page)
    encSecKey = get_encSecKey()
    # data = {'params': params, 'encSecKey': encSecKey}
    html = get_json(url, params, encSecKey)
    ht = json.loads(html)
    # 评论总数
    total = ht['total']
    # 总页数
    pages = int(math.ceil(total / 20))
    print("新歌名:" + new_song_name + "  评论总数:" + str(total) + "   评论页数:" + str(pages))

    dir = os.getcwd() + '\\Comments\\'
    if not os.path.exists(dir):  # 判断当前路径是否存在,没有则创建new文件夹

    num = 0  # 第num首歌
    for i in range(0, 1):  # 抓取热门评论,每首歌的仅第一页有15条热评hotcomment
        print(url, "   第{}页".format(i + 1))
        params = get_params(i + 1)
        encSecKey = get_encSecKey()
        json_text = get_json(url, params, encSecKey)
        # print(json_text)
        json_dict = json.loads(json_text)
        print("新歌榜歌名:", new_song_name, "  正在获取热门评论")

        for item in json_dict['hotComments']:
            # 用户ID
            user_Id = item['user']['userId']
            user_message = get_user(user_Id)
            avatarUrl = item['user']['avatarUrl']
            # 用户昵称
            user_nickname = item['user']['nickname']
            # 用户年龄
            user_age = str(user_message['age'])
            # 用户性别
            user_gender = str(user_message['gender'])
            # 用户所在省份
            user_province = str(user_message['province'])
            # 用户所在城市
            user_city = str(user_message['city'])
            # 个人介绍
            user_introduce = str(user_message['sign'].strip().replace('\n', '').replace(',', ','))
            # 评论内容
            comment = str(item['content'])  # 评论内容
            # comment = item['content'].strip().replace('\n', '').replace(',', ',')
            # 评论ID
            comment_id = str(item['commentId'])
            # 评论点赞数
            praise = str(item['likedCount'])
            # 评论时间
            date = time.localtime(int(item['time'] / 1000))  # [:10]))
            date = str(time.strftime("%Y-%m-%d %H:%M:%S", date))
            print("用户名:{}   用户ID:{}     用户年龄:{}     用户性别:{}     用户省份:{}     用户城市:{}     "
                  "用户介绍:{}     评论内容:{}     评论ID:{}     点赞数:{}      评论日期:{}".format(user_nickname,
                                                                                   user_Id, user_age, user_gender,
                                                                                   user_province, user_city,
                                                                                   user_introduce, comment, comment_id,
                                                                                   praise, date))
            num += 1
            with open(dir + str(new_song_order) + '. ' + new_song_name + '.txt', 'a',
                      encoding='utf-8') as f:  # '. '是为了防止文件名也是数字混合,加个空格分隔符,写入文件, a 追加
                f.write(comment + '\n')
                # f.write( str(num) + '.' + user_Id + ',' + user_nickname + ',' + user_gender + ','+  user_age + ',' + user_province + ',' + user_city + ',' + user_introduce + ',' + comment_id + ',' + comment + ',' + praise + ',' + date + '\n')

            with open(dir + str(new_song_order) + '. ' + new_song_name + '.csv', 'a', encoding='utf-8-sig') as f:
                writer = csv.writer(f)
                writer.writerow([str(num), user_Id, user_nickname, user_gender, user_age, user_province, user_city,
                                 user_introduce, comment_id, comment, praise, date])
            # comment_info = ( str(num) + '.' + user_Id + ',' + user_nickname + ',' + user_gender + ','+  user_age + ',' + user_province + ',' + user_city + ',' + user_introduce + ',' + comment_id + ',' + comment + ',' + praise + ',' + date + '\n')
            # all_comments_list.append(comment_info)
        print('--------第%d首歌的热门评论抓取完毕!-----------------------/n' % (new_song_order))

    for i in range(52):  # 逐页抓取,第num首歌的前52页评论
        print(url, "   第{}页".format(i + 1))
        params = get_params(i + 1)
        encSecKey = get_encSecKey()
        json_text = get_json(url, params, encSecKey)
        # print(json_text)
        json_dict = json.loads(json_text)
        print("新歌榜歌名:", new_song_name, "  评论总数:", json_dict["total"])  # 热评总数

        for item in json_dict['comments']:
            # 用户ID
            user_Id = item['user']['userId']
            user_message = get_user(user_Id)
            avatarUrl = item['user']['avatarUrl']
            # 用户昵称
            user_nickname = item['user']['nickname']
            # 用户年龄
            user_age = str(user_message['age'])
            # 用户性别
            user_gender = str(user_message['gender'])
            # 用户所在省份
            user_province = str(user_message['province'])
            # 用户所在城市
            user_city = str(user_message['city'])
            # 个人介绍
            user_introduce = str(user_message['sign'].strip().replace('\n', '').replace(',', ','))
            # 评论内容
            comment = str(item['content'])  # 评论内容
            # comment = item['content'].strip().replace('\n', '').replace(',', ',')
            # 评论ID
            comment_id = str(item['commentId'])
            # 评论点赞数
            praise = str(item['likedCount'])
            # 评论时间
            date = time.localtime(int(item['time'] / 1000))  # [:10]))
            date = str(time.strftime("%Y-%m-%d %H:%M:%S", date))
            print("用户名:{}   用户ID:{}     用户年龄:{}     用户性别:{}     用户省份:{}     用户城市:{}     "
                  "用户介绍:{}     评论内容:{}     评论ID:{}     点赞数:{}      评论日期:{}".format(user_nickname,
                                                                                   user_Id, user_age, user_gender,
                                                                                   user_province, user_city,
                                                                                   user_introduce, comment, comment_id,
                                                                                   praise, date))
            num += 1
            with open(dir + str(new_song_order) + '. ' + new_song_name + '.txt', 'a',
                      encoding='utf-8') as f:  # '. '是为了防止文件名也是数字混合,加个空格分隔符,写入文件, a 追加
                f.write(comment + '\n')
                # f.write( str(num) + '.' + user_Id + ',' + user_nickname + ',' + user_gender + ','+  user_age + ',' + user_province + ',' + user_city + ',' + user_introduce + ',' + comment_id + ',' + comment + ',' + praise + ',' + date + '\n')

            with open(dir + str(new_song_order) + '. ' + new_song_name + '.csv', 'a', encoding='utf-8-sig') as f:
                writer = csv.writer(f)
                writer.writerow([str(num), user_Id, user_nickname, user_gender, user_age, user_province, user_city,
                                 user_introduce, comment_id, comment, praise, date])
            # comment_info = ( str(num) + '.' + user_Id + ',' + user_nickname + ',' + user_gender + ','+  user_age + ',' + user_province + ',' + user_city + ',' + user_introduce + ',' + comment_id + ',' + comment + ',' + praise + ',' + date + '\n')
            # all_comments_list.append(comment_info)
        print('--------第%d首歌的%d页抓取完毕!-----------------------' % (new_song_order, i + 1))
        # time.sleep(random.choice(range(1, 3)))   # 爬取过快的话,设置休眠时间,跑慢点,减轻服务器负担

    for i in range(pages - 52, pages):  # 逐页抓取,第num首歌的后52页评论
        print(url, "   第{}页".format(i + 1))
        params = get_params(i + 1)
        encSecKey = get_encSecKey()
        json_text = get_json(url, params, encSecKey)
        # print(json_text)
        json_dict = json.loads(json_text)
        print("新歌榜歌名:", new_song_name, "  热评总数:", json_dict["total"])  # 热评总数

        for item in json_dict['comments']:
            # 用户ID
            user_Id = item['user']['userId']
            user_message = get_user(user_Id)
            avatarUrl = item['user']['avatarUrl']
            # 用户昵称
            user_nickname = item['user']['nickname']
            # 用户年龄
            user_age = str(user_message['age'])
            # 用户性别
            user_gender = str(user_message['gender'])
            # 用户所在省份
            user_province = str(user_message['province'])
            # 用户所在城市
            user_city = str(user_message['city'])
            # 个人介绍
            user_introduce = str(user_message['sign'].strip().replace('\n', '').replace(',', ','))
            # 评论内容
            comment = str(item['content'])  # 评论内容
            # comment = item['content'].strip().replace('\n', '').replace(',', ',')
            # 评论ID
            comment_id = str(item['commentId'])
            # 评论点赞数
            praise = str(item['likedCount'])
            # 评论时间
            date = time.localtime(int(item['time'] / 1000))  # [:10]))
            date = str(time.strftime("%Y-%m-%d %H:%M:%S", date))
            print("用户名:{}   用户ID:{}     用户年龄:{}     用户性别:{}     用户省份:{}     用户城市:{}     "
                  "用户介绍:{}     评论内容:{}     评论ID:{}     点赞数:{}      评论日期:{}".format(user_nickname,
                                                                                   user_Id, user_age, user_gender,
                                                                                   user_province, user_city,
                                                                                   user_introduce, comment, comment_id,
                                                                                   praise, date))
            num += 1
            with open(dir + str(new_song_order) + '. ' + new_song_name + '.txt', 'a',
                      encoding='utf-8') as f:  # '. '是为了防止文件名也是数字混合,加个空格分隔符,写入文件, a 追加
                f.write(comment + '\n')
                # f.write( str(num) + '.' + user_Id + ',' + user_nickname + ',' + user_gender + ','+  user_age + ',' + user_province + ',' + user_city + ',' + user_introduce + ',' + comment_id + ',' + comment + ',' + praise + ',' + date + '\n')

            with open(dir + str(new_song_order) + '. ' + new_song_name + '.csv', 'a', encoding='utf-8-sig') as f:
                writer = csv.writer(f)
                writer.writerow([str(num), user_Id, user_nickname, user_gender, user_age, user_province, user_city,
                                 user_introduce, comment_id, comment, praise, date])
            # comment_info = ( str(num) + '.' + user_Id + ',' + user_nickname + ',' + user_gender + ','+  user_age + ',' + user_province + ',' + user_city + ',' + user_introduce + ',' + comment_id + ',' + comment + ',' + praise + ',' + date + '\n')
            # all_comments_list.append(comment_info)
        print('--------第%d首歌的%d页抓取完毕!-----------------------' % (new_song_order, i + 1))
        # time.sleep(random.choice(range(1, 3)))   # 爬取过快的话,设置休眠时间,跑慢点,减轻服务器负担
    # print(all_comments_list)
    # print(len(all_comments_list))
    return all_comments_list

def get_user(user_Id):
    data = {}
    url = 'https://music.163.com/api/v1/user/detail/' + str(user_Id)
    response = requests.get(url=url, headers=headers)
    # 将字符串转为json格式
    js = json.loads(response.text)
    if js['code'] == 200:
        # 性别
        if  int(js['profile']['gender']) == 1:
            data['gender'] = "男"
        elif int(js['profile']['gender']) == 2:
            data['gender'] = "女"
        else :
            data['gender'] = "保密"

        # 年龄
        if int(js['profile']['birthday']) < 0:
            data['age'] = 0
            data['age'] = (2020 - 1970) - (int(js['profile']['birthday']) // (1000 * 365 * 24 * 3600))
        if int(data['age']) < 0:
            data['age'] = 0
        # 省份province
        data['province'] = js['profile']['province']
        if data['province'] == 110000:
            data['province'] = '北京'
        elif data['province'] == 120000:
            data['province'] = '天津'
        elif data['province'] ==310000:
            data['province'] = '上海'
        elif data['province'] ==500000:
            data['province'] = '重庆'
        elif data['province'] ==510000:
            data['province'] = '四川'
        elif data['province'] ==810000:
            data['province'] = '香港'
        elif data['province'] ==820000:
            data['province'] = '澳门'
        elif data['province'] ==130000:
            data['province'] = '河北'
        elif data['province'] ==140000:
            data['province'] = '山西'
        elif data['province'] ==150000:
            data['province'] = '内蒙古'
        elif data['province'] ==210000:
            data['province'] = '辽宁'
        elif data['province'] ==220000:
            data['province'] = '吉林'
        elif data['province'] ==230000:
            data['province'] = '黑龙江'
        elif data['province'] ==320000:
            data['province'] = '江苏'
        elif data['province'] ==330000:
            data['province'] = '浙江'
        elif data['province'] ==340000:
            data['province'] = '安徽'
        elif data['province'] ==350000:
            data['province'] = '福建'
        elif data['province'] ==360000:
            data['province'] = '江西'
        elif data['province'] ==370000:
            data['province'] = '山东'
        elif data['province'] ==410000:
            data['province'] = '河南'
        elif data['province'] ==420000:
            data['province'] = '湖北'
        elif data['province'] ==430000:
            data['province'] = '湖南'
        elif data['province'] ==440000:
            data['province'] = '广东'
        elif data['province'] ==450000:
            data['province'] = '广西'
        elif data['province'] ==460000:
            data['province'] = '海南'
        elif data['province'] ==510000:
            data['province'] = '四川'
        elif data['province'] ==520000:
            data['province'] = '贵州'
        elif data['province'] ==530000:
            data['province'] = '云南'
        elif data['province'] ==540000:
            data['province'] = '西藏'
        elif data['province'] ==610000:
            data['province'] = '陕西'
        elif data['province'] ==620000:
            data['province'] = '甘肃'
        elif data['province'] ==630000:
            data['province'] = '青海'
        elif data['province'] ==640000:
            data['province'] = '宁夏'
        elif data['province'] ==650000:
            data['province'] =  '新疆'
        elif data['province'] ==710000:
            data['province'] =  '台湾'
            data['province'] = '其他'

        data['city'] = js['profile']['city']
        # 个人介绍
        data['sign'] = js['profile']['signature']
        data['gender'] = '无'
        data['age'] = '无'
        data['province'] = '无'
        data['city'] = '无'
        data['sign'] = '无'
    return data

if __name__ == '__main__':
    start_time = time.time()  # 开始时间

    new_song_name, new_song_id = get_all_newSong()
    num = 0

    while num < len(new_song_name):    # 保存所有新歌榜中的热评
        print('\n ===============正在抓取第%d首歌曲热评...' % (num+1))
        # 热门歌曲评论很多,每首爬取最新的70页评论
        get_all_comments(new_song_id[num], 70 , new_song_name[num], num+1)
        print('第%d首歌曲热评抓取成功*********************** \n' % (num+1))
        num += 1

    end_time = time.time()  # 结束时间
    print('程序耗时%f秒.' % (end_time - start_time))
