新手修改了网上代码,想要爬百度新闻的标题和简介,不知道为什么运行结果是空。在mac自带的python2上运行:
from urllib import urlopen
import csv
import re
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
for k in range(1,36):
url = "http://news.baidu.com/ns?word=低保&pn=%s&cl=2&ct=1&tn=news&rn=20&ie=utf-8&bt=0&et=0"% ((k-1)*20)
csvfile = file("Dibao.csv", "ab+")
writer = csv.writer(csvfile)
content = urlopen(url).read()
soup = BeautifulSoup(content,"lxml")
list0 = []
list1 = []
list2 = []
list3 = []
for i in range(1,20):
hotNews = soup.find_all("div", {"class", "result"})[i]
a1=hotNews.find(name="a", attrs={"target": re.compile("_blank")})
list0.append(a1.text)
a2=hotNews.find(name="p", attrs={"class": re.compile("c-author")})
t1=a2.text.split()[0]
list1.append(t1)
t2 = a2.text.split()[1]
list2.append(t2)
if t2.find(u"年") == 4:
t3 = a2.text.split()[2]
list3.append(t3)
else:
list3.append(" ")
#将数据写入csv
data = []
for i in range(0,20):
data.append((list0[i], list1[i], list2[i],list3[i]))
writer.writerows(data)
csvfile.close()
print "第" + str(k) + "页完成"
报错:
Traceback (most recent call last):
File "", line 12, in
IndexError: list index out of range
不懂参数过范围什么意思,新闻一共37页,每页20条。
希望有大神能帮忙看一下,多谢啦~