
有没有哪位能看看b站评论区爬虫一直显示这个样子的报错,该怎么解决呢
import requests
import datetime
import csv
import hashlib
from urllib.parse import quote
import re
import time
def hash(date, num):
pagination_str = '{"offset":"{\\"type\\":3,\\"direction\\":1,\\"Data\\":{\\"cursor\\":%s}}"}' % num
Zt = [
"mode=2",
"oid=898762590",
f"pagination_str={quote(pagination_str)}",
"plat=1",
"type=1",
"web_location=1315875",
f"wts={date}"
]
Ut = '&'.join(Zt)
ct = 'ea1db124af3c7062474693fa704f4ff8'
string = Ut + ct
MD5 = hashlib.md5()
MD5.update(string.encode('utf-8-sig'))
w_rid = MD5.hexdigest()
print(w_rid)
return w_rid
def get_content(num):
headers = {
"Cookie": "i-wanna-go-back=-1; buvid_fp_plain=undefined; DedeUserID=358542448; DedeUserID__ckMd5=993191df0493e6c3; LIVE_BUVID=AUTO8816562569019759; CURRENT_FNVAL=4048; header_theme_version=CLOSE; CURRENT_PID=fc6ca410-cfd7-11ed-9e75-2546975c7231; FEED_LIVE_VERSION=V8; hit-new-style-dyn=1; buvid3=4E102E81D-A99C-8D49-1F5E-D5BFCD9982B954941infoc; b_nut=1686098454; b_ut=5; _uuid=10B17772A-1447-97EC-B291-76F8514A72ED57049infoc; buvid4=AF2FFFC5-E98E-0872-8D0C-8D37B69A9F4442284-022060213-ujeqZGTJGq3%2BD3CBl4%2BRLw%3D%3D; enable_web_push=DISABLE; CURRENT_QUALITY=0; fingerprint=c0c84de23185b5a2e7b7fcc56809c121; buvid_fp=c0c84de23185b5a2e7b7fcc56809c121; rpdid=|(u)~m|YJ))|0J'u~|JYlum~~; SESSDATA=aff3e6a4%2C1719881593%2C6b0f2%2A11CjC-a-HgCYRgt2Y5IEQAgWBm_2ysdPF0VT8A_nKzDrzmKUKw6ii-26bjonf_lfgOLA0SVk91XzZYZXdSWl9tV2djdk1nMGszMm1QdGVWVzcxX0NmOHUzbGQ0V1lUR05qWUJDZnJQdERYWmdSZHFXbl8tSkIwNERnVGtrM3cyUUxtNXJFSnFOTmxBIIEC; bili_jct=4519996fd3d7c71943b6d4ae4d2e5e19; bp_video_offset_358542448=882657036786991107; browser_resolution=1220-680; home_feed_column=4; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDQ3NzkxNjMsImlhdCI6MTcwNDUxOTkwMywicGx0IjotMX0.kmWdpeukRHtQmyonkkvCvS-qIm-OA1dV4F5dYfxrxwA; bili_ticket_expires=1704779103; b_lsid=DDA26288_18CDE10B467; sid=g853q0b1; PVID=5",
"Host": "api.bilibili.com",
"Referer": "https://www.bilibili.com/video/BV1MN4y177PB/?spm_id_from=333.337.search-card.all.click&vd_source=71aed3cc1650a98dadae447f4df568fb",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
}
date = int(time.time())
url = 'https://api.bilibili.com/x/v2/reply/wbi/main'
w_rid = hash(date, num)
# sss = '{"offset":"{\\"type\\":3,\\"direction\\":1,\\"Data\\":{\\"cursor\\":%s}}"}' % num
# print(sss)
# 查询参数
data = {
'oid': '898762590',
'type': '1',
'mode': '2',
'pagination_str': '{"offset":"{\\"type\\":3,\\"direction\\":1,\\"Data\\":{\\"cursor\\":%s}}"}' % num,
'plat': '1',
'web_location': '1315875',
'w_rid': w_rid,
'wts': date,
}
# 发送请求
response = requests.get(url=url, params=data, headers=headers)
json_data = response.json()
print(json_data)
json_data: json字典数据
replies = json_data['data']['replies']
# for循环遍历, 把列表里面元素一个一个提取出来
for index in replies:
text = index['content']['message'] # 评论
name = index['member']['uname'] # 昵称
location = index['reply_control']['location'].replace('IP属地:', '') # IP
like = index['like'] # 点赞
ctime = index['ctime'] # 时间戳
# 保存字典里面
dit = {
'昵称': name,
'地区': location,
'点赞': like,
'内容': text,
}
# print(text, name, sex, like, location, date)
csv_writer.writerow(dit)
print(dit)
# 提取下一页的参数内容
next_offset = json_data['data']['cursor']['pagination_reply']['next_offset']
next_num = re.findall('"cursor":(\\d+)', next_offset)[0]
return next_num
if __name__ == '__main__':
"""保存数据"""
# 创建文件对象
f = open('data.csv', mode='w', encoding='utf-8-sig', newline='')
csv_writer = csv.DictWriter(f, fieldnames=[
'昵称',
'地区',
'点赞',
'内容',
])
csv_writer.writeheader()
num = '168701'
for page in range(1, 11):
num = get_content(num)