我用线程池,但感觉没起作用
import re
import requests
import json
import os
from concurrent.futures import ThreadPoolExecutor
from time import time
ltime = int(time())
# 拿到播放首页的页面源码
def get_sourse(page):
# url不要写错了,不要带后面的参数,参数已经被封装到了params字典里!
url = "https://www.bilibili.com/video/BV1Kh411r7uR"
headers = {
"User-Agent": "",
}# ua需填
params = {
'p': page,
'vd_source': '8d74638d8c4f1428cf1da9c50d5455f6',
}
resp = requests.get(url,headers=headers,params=params,stream=True)
resp.encoding="utf-8"
return resp.text
# 拿到源码中的嵌套的视频、音频链接
def get_dv_url():
# 这里的正则对于不同的视频可能会不一样,要注意一下!
pattern = re.compile('window.__playinfo__=(?P<playinfo>.*?)</script>', re.S)
video_html = pattern.finditer(get_sourse(i))
# 这里直接用迭代器__next__()方法取值,不用for循环
playinfo = video_html.__next__().group("playinfo")
# print(playinfo)
result = json.loads(playinfo)
# print(result)
videourl = result['data']['dash']['video'][0]['baseUrl']
audiourl = result['data']['dash']['audio'][0]['baseUrl']
return videourl,audiourl
# 下载音视频
def download():
headers = {
"User-Agent": "",#ua需填
"referer":"https://www.bilibili.com/video/BV1Kh411r7uR?p=1&vd_source=8d74638d8c4f1428cf1da9c50d5455f6"
}
resp_video = requests.get(get_dv_url()[0],headers=headers).content
resp_audio = requests.get(get_dv_url()[1],headers=headers).content
# 异常捕获用于当要创建的目录存在时跳过创建目录
try:
# 创建目录
os.mkdir(f'D:\\video_data\\zhiyuanjsnx\\{i}')
except FileExistsError:
pass
# 判断文件存在并跳过创建写入文件
isExists_1 = os.path.exists(f"D:\\video_data\\zhiyuanjsnx\\{i}\\{i}_video.m4s")
if not isExists_1:
with open(f'D:\\video_data\\zhiyuanjsnx\\{i}\\{i}_video.m4s',mode="wb") as f:
f.write(resp_video)
isExists_2 = os.path.exists(f'D:\\video_data\\zhiyuanjsnx\\{i}\\{i}_audio.m4s')
if not isExists_2:
with open(f'D:\\video_data\\zhiyuanjsnx\\{i}\\{i}_audio.m4s',mode="wb") as f:
f.write(resp_audio)
if __name__ == '__main__':
page = int(input("请输入你要下载的集数:"))
t1 = time()
# 创建线程池
with ThreadPoolExecutor(50) as k:
for i in range(36, page+1):
k.submit(download())
t2 = time()
print("此次程序执行耗时:",t2-t1)