在使用scrapy抓取百度健康关于抑郁的问答的时候,启动了程序但是csv文件中没有任何数据,请问大家怎么解决
import scrapy
from scrapy.selector import Selector
from scrapy.http import Request
from scrapy.http import HtmlResponse
from scrapy_doctor.items import ScrapyDoctorItem
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider,Rule
class DoctorSpider(scrapy.Spider):
name = "doctor"
allowed_domains = ["so.120ask.com"]
#start_urls = ["http://so.120ask.com/?kw=%E6%8A%91%E9%83%81&page=1&isloc=1"]
#url='http://so.120ask.com/?kw=%E6%8A%91%E9%83%81&page=%d&isloc=1'
#page=1
def start_requests(self):
for page in range(5):
yield Request(url=str(f'http://so.120ask.com/?kw=%E6%8A%91%E9%83%81&page=str{{page + 1}}&isloc=1'))
def parse(self, response:HtmlResponse ):
sel=Selector(response)
list_items=sel.css('#datalist > li')
for list_item in list_items :
detail_urls = list_item.css('h3 > a::attr(href)').extract()
for detail_url in detail_urls:
full_url = 'https:' + detail_url if detail_url.startswith('//') else detail_url
yield Request(url=full_url, callback=self.parse_detail)
def parse_detail(self,response):
print(response.body)
sel = Selector(response)
doctor_item=ScrapyDoctorItem()
doctor_item['doctorInformation']=sel.css('span[class=b_sp1]::text').extract_first()
doctor_item['goodAt'] = sel.css('span[class=b_sp2]::text').extract_first()
doctor_item['question'] = sel.css('h1[id=askH1]::text').extract_first()
doctor_item['answer'] = sel.css('div[class=crazy_new]::text').extract_first()
yield doctor_item