ulist.append([tds[0].text,tds[1].text,tds[2].text,tds[3].text,tds[4].text,tds[5].text,tds[6].text,tds[7].text])
IndexError: list index out of range
尝试打印了ulit显示的是0
用代码块功能插入代码,请勿粘贴截图
def getHTMLText(url):#爬取网站数据
try:
r = requests.get(url, timeout = 30,verify=False)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return '爬取失败'
def fillUnivlist(ulist,html):#解析网站数据
soup = BeautifulSoup(html,"html.parser")
for tr in soup.find('body').children:
if isinstance(tr,bs4.element.Tag):
tds = tr('td')
print(len(tds))
ulist.append([tds[0].text,tds[1].text,tds[2].text,tds[3].text,tds[4].text,tds[5].text,tds[6].text,tds[7].text])
def writeUlistfile(ulist,dataname):#将网站存入csv文件
with open(dataname,'w',encoding = 'utf-8',newline='') as fout:
writer = csv.writer(fout)
for row in ulist:
writer.writerow(row)
uinfo1 =[]
url1 = 'https://www.chyxx.com/industry/202105/953391.html'
html1 = getHTMLText(url1)
fillUnivlist(uinfo1,html1)
writeUlistfile(uinfo1,'各种油产量初.csv')
运行结果及报错内容
我的解答思路和尝试过的方法
试着打印了一下,结果显示为0,减少了tds还是不行。
是爬取方式出错了吗?
我想要达到的结果
可以爬到数据