import scrapy
from spider1.items import Spider1Item
class JobSpider(scrapy.Spider):
name = 'job'
allowed_domains = ['qidian.com']
def start_requests(self):
for next_page in range(1,6):
url = 'https://www.qidian.com/finish?action=hidden&orderId=&style=1&pageSize=20&siteid=1&pubflag=0&hiddenField=2&page={0}'.format(
next_page)
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response, **kwargs):
items = response.xpath('//div[@class="book-img-text"]/ul/li')
for item in items:
a = Spider1Item()
a['title'] = item.xpath('.//div[@class="book-mid-info"]/h4/a/text()').extract()[0]
a['author'] = item.xpath('.//div[@class="book-mid-info"]/p[@class="author"]/a/text()').extract()[0]
yield a
这串代码爬取不按照顺序爬取,有什么办法可以让他按顺序爬取。