python小学生,找了段代码改了改,结果8134条数据只爬下来五千多条,求赐教
附上代码:
import requests
import pandas as pd
requests.packages.urllib3.disable_warnings()
# 该公司的id
company_id = '1999074'
# 该公司的人员信息有82页
for page in range(1,82):
# 构造data参数
url = 'https://exam.sac.net.cn/pages/registration/train-line-register!list.action'
data = {'filter_EQS_AOI_ID': company_id,
'filter_EQS_PTI_ID': 0,
'page.searchFileName': 'homepage',
'page.sqlKey': 'PAGE_FINISH_PUBLICITY',
'page.sqlCKey': 'SIZE_FINISH_PUBLICITY',
'_search': 'false',
'page.pageSize': 100,
'page.pageNo': page,
'page.orderBy': 'id',
'page.order': 'desc'}
# post方法访问某页
resp = requests.post(url, data=data, verify = False)
# 解析网页数据
df = pd.DataFrame(resp.json()['result'])
# 存储数据
df.to_csv('爬虫.csv', mode='a+')
dt = pd.read_csv('爬虫.csv')
# 存储到Excel
dt.to_excel('爬虫.xlsx', index = False)