尝试了很多次爬取优酷视频的弹幕,但代码运行后出现错误,想问一下以下代码应该怎么修改才能成功获取弹幕数据啊?感恩
错误提示:requests.exceptions.SSLError: HTTPSConnectionPool(host='acs.youku.com', port=443): Max retries exceeded with url: /h5/mopen.youku.danmu.list/1.0/?jsv=2.6.1&appKey=24679788&t=1706160171639&sign=5dcd5fbf040d7180dcb1f0fb664c3c0a&api=mopen.youku.danmu.list&v=1.0&type=originaljson&timeout=20000&dataType=jsonp (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1129)')))
import requests
from bs4 import BeautifulSoup
def post_dama_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
dama_list = soup.find_all('result', "type","content")
for dama in dama_list:
dama_content = dama.find("content").text
print(dama_content)
with open('优酷弹幕-大圣归来.txt', 'a', encoding='utf-8') as f:
f.write(dama_list)
f.write('\n')
else:
print('请求失败')
url = 'https://acs.youku.com/h5/mopen.youku.danmu.list/1.0/?jsv=2.6.1&appKey=24679788&t=1706160171639&sign=5dcd5fbf040d7180dcb1f0fb664c3c0a&api=mopen.youku.danmu.list&v=1.0&type=originaljson&timeout=20000&dataType=jsonp'
post_dama_data(url=url)
尝试换方式,但最后仍然显示无法运行:
import requests
import re
from bs4 import BeautifulSoup
url = 'https://acs.youku.com/h5/mopen.youku.danmu.list/1.0/?jsv=2.6.1&appKey=24679788&t=1705909158072&sign=e44ea1b02e036d2ddfad7a74c2abbe63&api=mopen.youku.danmu.list&v=1.0&type=originaljson&timeout=20000&dataType=jsonp'
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
'Referer': 'https://v.youku.com/v_show/id_XODQ5MTM3NzQ0.html?spm=a2h0c.8166622.PhoneSokuProgram_1.dposter&s=c5b73d3a842811e4abda/'
}
requests.get(url=url,headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
video_title = soup.find('meta',attr={'name':'irTitle'})['content']
video_url = soup.find('meta',attr={'itemprop':'contentURL'})['content']
print('视频标题:',video_title)
print('视频地址:',video_url)
danmuku_id_patten = re.compile(r'"daluId":"(.+?)"')
danmuku_id = danmuku_id_patten.search(response.text).group(1)
print('弹幕ID:',danmuku_id)
danmuku_api = f
response = requests.get(danmuku_api)
danmuku_data = response.json()
print('弹幕数据:',danmuku_data)
danmuku_list = danmuku_data['bulletInfo']['bullets']
for danmuku in danmuku_list:
content = danmuku['content']
time = danmuku['sohuTime']
print('弹幕内容:',content)
print('弹幕时间',time)
danmuku_list.to_csv(f"优酷视频弹幕-魁拔3.csv", mode='w',encoding="utf-8", errors='ignore', index=False)
以上代码尝试多次最后仍然运行失败