这是爬虫部分,
我想要获取30页的内容,但是不登录只能获取10页的内容,我看是需要加cookie,
但是应该怎么加呢,加在哪儿
#import scrapy
#from demo1.items import Demo1Item
#class BaiduSpider(scrapy.Spider):
name = 'baidu'
#allowed_domains = ['daidu.com']
#page_num = 15
#url = 'https://www.liepin.com/zhaopin/?&dq=280020¤tPage='
#start_urls = [url + str(page_num)]
print(start_urls)
def parse(self, response):
li_list = response.xpath('//div[@class="left-list-box"]/ul/li')
for li in li_list:
item = Demo1Item()
# print(li)
name =li.xpath('.//div[@class="job-title-box"]/div/@title')[0].extract()
print(name)
# 获取工作地点
location =li.xpath('.//div[@class="job-dq-box"]/span[@class="ellipsis-1"]/text()')[0].extract()
print(location)
item = Demo1Item()
item['name'] = name
item['location'] = location
#将item提交给管道
yield item
self.page_num += 1
yield scrapy.Request(self.url + str(self.page_num),callback = self.parse,dont_filter = True)