我想要用python爬虫重复获取网站信息,但爬不出东西,这个问题之前不存在,直到我在同一个网站翻页100页,没有设置停顿之后,就出现了网页请求访问正确,但是网站的数据似乎爬不下来了,数据只有第一页的,后面的页无法爬取。


代码如下,有人能帮忙看看,运行一下并告诉我什么原因吗,能帮我解决,有偿答谢
# -*- coding:utf-8 -*-
import time
import xlwt
import xlrd
import re
import requests
from bs4 import BeautifulSoup
findZclx = re.compile(r'<td class="zclx_lw textcenter">(.*?)</td>')
findLink = re.compile(r'href="(.*?)"')
findTitle = re.compile(r'>(.*?)</a>')
findDate = re.compile(r'<td class="textcenter">(.*?)</td>')
header = {
"User-Agent": '"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.310.0 Safari/532.9"'
}
number = 0
Keyword = ""
savepath1 = "筛选信息.xls"
dr = xlrd.open_workbook(savepath1)
table = dr.sheet_by_index(0)
KeyWords = table.col_values(0)
del KeyWords[0]
def main():
baseurl = 'http://www.crpsz.com/zbxx/006001/secondpagejy.html?categoryNum=006001&pageIndex='
for KeyWord in KeyWords:
global Keyword
Keyword = KeyWord
savepath = Keyword + "招标信息.xls"
datalist = askDATA(baseurl)
SavePath(savepath, datalist)
def askDATA(baseurl):
global number
global Keyword
number = 0
datalist = []
for i in range(1, 20):
url = baseurl + str(i)
time.sleep(3)
page = requests.get(url, headers=header)
print(page.status_code)
page.encoding = "utf-8"
soup = BeautifulSoup(page.text, 'html.parser')
xm = soup.find_all('tr')
del xm[0]
for item in xm:
data = []
item = str(item)
zclx = re.findall(findZclx, item)[0]
if zclx == "FZ":
link = re.findall(findLink, item)[0]
link = 'http://www.crpsz.com' + link
title = re.findall(findTitle, item)[0]
date = re.findall(findDate, item)[0]
if re.search(Keyword, title):
data.append(link)
data.append(title)
data.append("服务")
data.append(date)
datalist.append(data)
number = number + 1
print("这是"+Keyword+'的第%d个数据'%number)
time.sleep(3)
return datalist
def SavePath(savepath, datalist):
global number
print("save.......")
book = xlwt.Workbook(encoding="utf-8", style_compression=0) # 创建workbook对象
sheet = book.add_sheet('招标信息', cell_overwrite_ok=True) # 创建工作表
col = ("项目详情链接", "标题", "招采类型", "发布时间")
for i in range(0, 4):
sheet.write(0, i, col[i]) # 列名
for i in range(0, number):
# print("第%d条" % (i + 1)) # 输出语句,用来测试
data = datalist[i]
for j in range(0, 4):
sheet.write(i + 1, j, data[j]) # 数据
book.save(savepath) # 保存
if __name__ == "__main__":
main()
print("爬取完毕")