import requests,re
from openpyxl import Workbook
wb=Workbook()
ws =wb.active
ws.append(["韩剧名","主演"])
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
}
page_url="https://www.hjutv.cn/show_2________{}___.html"
detail_url="https://www.hjutv.cn/{}.html"
response =requests.get(url=page_url,headers=headers)
for i in range(1,28):
url = page_url.format(i)
response = requests.get(url=url, headers=headers)
html = response.text
tvplay_pattern = re.compile(r'<li class="vodlist_item .*?">(.*?)</li>',re.S)
tvplay_list = tvplay_pattern .findall(html)
print(len(tvplay_list))
# 匹配剧名
name_pattern =re.compile(r'<p class="vodlist_title"><a .*?>(.*?)</a></p>',re.S)
# 匹配演员名
actor_name_pattern =re.compile(r'<p class="vodlist_sub"><a .*?>(.*?)</a></p>',re.S)
for tvplay in tvplay_list:
name = name_pattern.findall(tvplay)[0]
actor_name=actor_name_pattern.findall(tvplay)[0]
print(name,actor_name)
ws.append([name,actor_name])
wb.save("./data/韩剧.xlsx")
该怎么写