代码如下
import json
import requests
from scrapy.crawler import logger
def get_rbballData(self, dayStart, dayEnd):
"""
爬虫主程序
"""
pageNo = 1
session = self._get_session()
while pageNo <= self.pageNum:
# 循环抓取数据
url = self._get_url(dayStart, dayEnd, pageNo)
res_text = self._send_request(session, url)
if res_text != None:
ret = self._parseText_toData(res_text)
pageNo = pageNo + 1
count = self._save_rbballData()
return count
def _get_url(self, dayStart, dayEnd, pageNo):
"""
构造抓取的url
"""
url = "http://www.cwl.gov.cn/cwl_admin/kjxx/findDrawNotice?name=ssq&issueCount=&issueStart=&issueEnd=&dayStart={}&dayEnd={}&pageNo={}"
url = url.format(dayStart,dayEnd,pageNo)
return url
def _get_session(self):
"""
构造session
"""
headers = {"Accept":"application/json, text/javascript, */*; q=0.01",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
"X-Requested-With":"XMLHttpRequest",
"Accept-Encoding":"gzip, deflate",
"Referer": "http://www.cwl.gov.cn/kjxx/ssq/kjgg/",
"Accept-Language":"zh-CN,zh;q=0.9"}
session = requests.Session()
session.headers.update(headers)
return session
def _send_request(self, session, url):
"""
发送请求,并返回响应文本,
return:请求成功返回文本,请求失败,返回None
"""
res = session.get(url)
if res.status_code != 200:
res = None
logger.info("请求url失败,url=[{}]".format(url))
return res.text
def _parseText_toData(self, text):
"""
获取一页双色球开奖结果,
return:获取成功返回,未成功返回-1
"""
try:
text = json.loads(text)
if text["state"] != 0:
# 查询失败
logger.info("查询返回失败,返回结果:[{}]".format(text))
text = None
return -1
except Exception as e:
logger.info("文本转换json失败,文本:【{}】".format(text))
logger.info("异常信息:{}".format(str(e)))
text = None
return -1
#text["result"]本身是个dict,需将其逐个写入rbballData
for t in text["result"]:
self.rbballData.append(t)
self.pageNum = text["pageCount"]
return 0
def _save_rbballData(self):
"""
将抓取到的数据保存到rbball_data.txt文件中
reture: 返回双色球总期数
"""
self.rbballData.reverse()
print(self.rbballData[0]["code"])
count = len(self.rbballData)
if count > 0:
with open("rbball_data.txt", 'w', encoding="utf-8") as f:
f.write(str(self.rbballData))
else:
logger.info("未获取到双色球数据!!!")
return count
我想使用这个代码
我写了个这个
if __name__=='__main__':
get_rbballData(self, "2021-01-01", "2021-01-31")
但是报错