报错信息如下:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_49980\3608093107.py in <module>
1 spider = BilibiliSpider()
2 tv_total, reply_total = 1000* 1, 1 # 设置视频条数, 回复记录数
----> 3 spider.update_data_by_online("20221101", "20221231", tv_total, reply_total)
4 spider.load_from_file()
5 df = spider.data
~\AppData\Local\Temp\ipykernel_49980\3437160765.py in update_data_by_online(self, start_date, end_date, tv_total, reply_total, interval_days)
199 b2 = time.time()
200 for index, video in enumerate(video_list):
--> 201 self.__update_video_detail(video, reply_total)
202 if (index + 1) % 100 == 0:
203 print(f"进度: {index + 1} / {len(video_list)} = {(index + 1) / len(video_list) * 100:.2f}%, Cost Time : {time.time() - b2:.2f}")
~\AppData\Local\Temp\ipykernel_49980\3437160765.py in __update_video_detail(self, video, reply_total)
239 data_item["up主粉丝量"] = upData["fans"]
240 # 获取了所有的视频, 尝试更新评论等其他信息
--> 241 reply_list = self.__load_all_reply(detail["aid"], reply_total)
242 if len(reply_list) == 0:
243 reply_list = [{}]
~\AppData\Local\Temp\ipykernel_49980\3437160765.py in __load_all_reply(self, aid, reply_total)
183 reply_list = []
184 while True:
--> 185 is_end, replies = self.reply(aid, pn)
186 reply_list += replies
187 if len(reply_list) >= reply_total or is_end:
~\AppData\Local\Temp\ipykernel_49980\3437160765.py in reply(self, aid, pn)
128 logger.debug(f"访问URL: {url}")
129 data = requests.get(url, headers=headers, proxies=proxies).json()
--> 130 is_end = data[u"data"]["cursor"]["is_end"]
131 data_list = []
132 for reply in data[u"data"]["replies"]:
KeyError: 'data'
但是并不是每次运行都出问题的,爬取不同时间段,有的可以正常运行,有的又会出现相关报错
之前是没有u的,我加了u发现代码与报错都不受影响