问题遇到的现象和发生背景
两个代码错了
用代码块功能插入代码,请勿粘贴截图
import time
import random
import requests
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import csv
burl='https://search.bilibili.com/all'
headervalue={
'user-agent':' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}
pe=[
{'http':'http://221.226.75.86:55443'},
{'http':'http://183.247.152.98:53281'},
{'http':'http://118.163.13.200:8080'},
]
def p(content):
items=[]
soup=BeautifulSoup(content,'lxml')
videolist=soup.select('video-list row>div')
for v in videolist:
title=v.select('.bili-video-card__info--right>h3')[0].text.strip()#视频标题
vtime=v.select('.bili-video-card__stats__duration')[0].text.strip()#获取视频时长
vcount=v.select('.bili-video-card__stats--left>span')[0].text.strip()#观看次数
up=v.select('.bili-video-card__info--right>span')[1].获text.strip()#取上传时间
upm=v.select('.bili-video-card__info--right>span')[0].获text.strip()#获取up主
vlink='http:'+v.select('.bili-video-card__wrap __scale-wrap').attrs['href']#获取视频链接
item={
'视频标题':title,
'视频时长':vtime,
'观看次数':vcount,
'上传时间':up,
'up主':upm,
'视频链接':vlink}
items.append(item)
print(item)
def getp(kw,page):
pa={
'keyword':kw,
'page':str(page)
}
url=burl+urlencode(pa)
try:
r=requests.get(url,headers=headervalue,ps=random.choice(pe))
except:
print('请求失败')
else:
if r.status_code==200:
items=p(r.text)
sleept=random.randint(2,5)+random.random()
time.sleep(sleept)
if __name__=='__main__':
keyword=input('请输入关键词:')
with open(keyword+'.csv','w',newline='',encoding='utf-8') as file:
names=['视频标题','视频时长','观看次数','上传时间','up主','视频链接']
writer=csv.DictWriter(file,fieldnames=names)
writer.writeheader()
for i in range(1,4):
print('正在爬取第%d页的视频信息' % i)
items=getp(keyword,i)
#writer.writeheader(items)
这个代码错在哪里了第一个,求解
import re
import requests
from bs4 import BeautifulSoup
import bs4
def gethtmltext(url):#获取网页内容
try:
r=requests.get(url)
r.raise_for_status()#检查是否异常
r.encoding=r.apparent_encoding#响应编码内容,a..g分析响应编码方式
html=r.text
except:
html='爬取失败'
return html
def findlist(ulist,html):#提取相关内容
soup=BeautifulSoup(html,'html.parser')
dates=re.compile('<tbody.*?data-v-3fe7d390.*?>(.*?)')
date=re.findall(dates,html)
for i in date:
if isinstance(i,bs4.element.Tag):#子节点会包含换行符之类的节点
t=i.find_all('td')
ulist.append([t[0].div.string,t[1].find(name='a',attr={'class':'name-cn'}).string,
t[2].get_text(),t[3].get_text,t[4].string,t[5].string])
def printulist(ulist,num):#格式化函数
cols="{0:{6}^10}\t{1:{6}^10}\t{2:{6}^10}\t{3:{6}^10}\t{4:{6}^10}\t{0:{5}^10}\t"
print(cols.format('排名','大学名称','省市','类型','总分','办学层次'),chr(12288))
for k in range(num):
j=ulist[k]
print(cols.format(str(j[0]).strip(),str(j[1]).strip(),str(j[2]).strip(),str(j[3]).strip(),str(j[4]).strip(),str(j[5]).strip(),chr(12288)))
def main():
ulist=[]
url='https://shanghairanking.cn/rankings/bcur/2022'
html=gethtmltext(url)
findlist(ulist,html)
printulist(ulist,20)
main()
第二个代码错在哪里了求解
运行结果及报错内容
第一个请求失败,第二个IndexError: Replacement index 6 out of range for positional args tuple
我想要达到的结果
解决代码