用scrapy请求站点 http://bigfile.co.kr 的时候,显示Filtered duplicate request:no more duplicates错误,然后就结束了,加上dont_filter=True,重新运行,结果一直死循环,无法结束,也不能爬到东西,有没有大神看一下
name = 'WebSpider'
start_urls = ['http://bigfile.co.kr']
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
'Referer': 'http://www.baidu.com/',
"Upgrade-Insecure-Requests": 1,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
}
def start_requests(self):
request = scrapy.Request(url=self.start_urls[0], headers=self.headers, callback=self.parse)
request.meta['url'] = self.start_urls[0]
yield request