就是写了一个爬虫,一直调用一个函数扣数据,其中用到for循环,运行次数少的时候,代码是没问题的,到了1000次以上,就提示我recruitInformation和companyInformation局部变量的范围有问题,就是和全局变量在函数里错当局部变量的错误一样,错误提示我忘留下了。下面是代码。
def get_one_data(soup0,url0):
global T,F,recruitInformation,companyInformation
#扣取招聘标题title
title0 = soup0.select('.inner-left > h1:nth-of-type(1)')
for tt in title0:
title = tt.get_text()
# print(title)
#扣取招聘公司company
company0 = soup0.select('body > div.terminalpage.clearfix > div.terminalpage-right > div.company-box > p > a')
for co in company0:
company = co.get_text()
# print(company)
#扣取职位福利
welfare0 = soup0.select('.welfare-tab-box')
for ww in welfare0:
welfare = list(ww.stripped_strings)
# print(welfare)
#扣取招聘信息
recruitInformation0 = soup0.select('body > div.terminalpage.clearfix > div.terminalpage-left > ul')
for re in recruitInformation0:
recruitInformation = list(re.stripped_strings)
dd="" #临时存储其中一个信息
i=0 #控制list中的个数
dao=[] #塞进整个招聘信息
m=1 #在recruitInformation的长度为17时,控制信息个数,总共8个
if(len(recruitInformation)==16):
for d in recruitInformation:
if i < 2:
dd = dd + d
i = i + 1
else:
dao.append(dd)
# print(dd)
dd = d
i = 1
dao.append(dd)
# print(dao)
else:
for d in recruitInformation:
if m==2:
if i<3:
dd=dd+d
i=i+1
else:
dao.append(dd)
m=m+1
dd=d
i=1
continue
if i<2:
dd=dd+d
i=i+1
else:
dao.append(dd)
m=m+1
dd=d
i=1
dao.append(dd)
# print(dao)
#扣取职位描述
positionDescribe0=soup0.select('body > div.terminalpage.clearfix > div.terminalpage-left > div.terminalpage-main.clearfix > div > div:nth-of-type(1)')
for po in positionDescribe0:
positionDescribe = po.get_text()
# print(positionDescribe)
#扣取公司基本信息
companyInformation0=soup0.select('body > div.terminalpage.clearfix > div.terminalpage-right > div.company-box > ul')
for ci in companyInformation0:
companyInformation = list(ci.stripped_strings)
cidd = "" #临时存储其中一个信息
i = 0 #控制list中的个数
cidao = [] #塞进整个公司基本信息
for d1 in companyInformation:
if i < 2:
cidd = cidd + d1
i = i + 1
else:
cidao.append(cidd)
# print(dd)
cidd = d1
i = 1
cidao.append(cidd)
# print(cidao)
#扣取公司简介
allData = {
"url": url0,
"title": title,
"company": company,
"welfare": welfare,
"recuritInformation": dao,
"positionDescribe": positionDescribe.replace(" ", ""),
"companyInformation": cidao,
"companyIntroduction": "none"
}
print("抽取数据正常")
m=inputData(allData,dd)
print("存取数据可进行")
if m==1:
T=T+1
else:
F=F+1
print("成功了"+str(T)+"个,失败了"+str(F)+"个")
将recruitInformation和companyInformation注明global是试试能不能解决问题,一开始只是加了recruitInformation,后来提示了companyInformation也出错。我加了companyInformation后正在跑跑看。