之前用requests模拟登录,加载到本地一直自动刷新。用scrapy登陆后,返回200和“登录过于频繁,请稍后重试”,求助dalao,搞了好多天了。代码如下:
-*- coding: utf-8 -*-
import scrapy
import json
import re
class ZhihuSpider(scrapy.Spider):
name = 'zhihu'
allowed_domains = ['www.zhihu.com']
start_urls = ['http://www.zhihu.com/']
agent = "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0"
header = {
"HOST": "www.zhihu.com",
"REFERER": "https://www.zhihu.com",
"User-Agent": agent,
"Connection":"Keep-Alive"
}
def parse(self, response):
pass
def start_requests(self):
return [scrapy.Request("https://www.zhihu.com/signin?next=/",headers=self.header,callback=self.login)]
def login(self,response):
match_obj = re.match('.*name="_xsrf" value="(.*?)"', response.text, re.DOTALL)
xsrf = ''
if match_obj:
xsrf = (match_obj.group(1))
if xsrf:
post_url = "https://www.zhihu.com/login/phone_num"
post_date = {
"_xsrf": xsrf,
"password": "13247161221",
"phone_num": "yun10791023",
'captcha_type': 'cn',
'remember_me': 'true',
}
return [scrapy.FormRequest(
url = post_url,
formdata = post_date,
headers = self.header,
callback = self.checklogin
)]
def checklogin(self,response):
#验证服务器的返回数据判断是否成功
text_json = json.loads(response.text)
#此处打断点出现:登录过于频繁,请稍后重试
if "msg" in text_json and text_json["msg"] == "登陆成功":
for url in self.start_urls:
yield scrapy.Request(url,dont_filter=False,headers = self.header,)