最近在抓取拉勾网招聘信息的过程中 抓取一段时间后 会出现302重定向
检查后发现被重定向至登录页面
本以为完美解决 但结果并没有这么简单,登录后还是会出现302问题
求大神帮忙解惑!!
settings配置如下:
BOT_NAME = 'LagouSpider'
SPIDER_MODULES = ['LagouSpider.spiders']
NEWSPIDER_MODULE = 'LagouSpider.spiders'
ROBOTSTXT_OBEY = False
CONCURRENT_REQUESTS = 2
DOWNLOAD_DELAY = 3
#禁止重定向
COOKIES_ENABLED = False
REDIRECT_ENABLED = False
AUTOTHROTTLE_ENABLED = True
AUTOTHROTTLE_START_DELAY = 2
DEFAULT_REQUEST_HEADERS = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'Host': 'www.lagou.com',
'Origin': 'https://www.lagou.com',
'Referer': 'https://www.lagou.com/',
}
DOWNLOADER_MIDDLEWARES = {
# 'LagouSpider.middlewares.LagouspiderDownloaderMiddleware': 543,
'LagouSpider.middlewares.RandomUserAgentMiddleware' : 100,
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware' : None,
'LagouSpider.middlewares.LagoucrawlerDownloaderMiddleware' : 543,
}