请问这种问题该怎么解决啊?
pycharm的scrapy包爬虫时重复获取同一条数据,
用yield传输之后
csv文件里除了第一行为列名以外,所有行的内容都是一样的,重复该网页目标区的某一条消息(多次运行生成的不同),比该网页目标区的此条信息总量还多
运行语句为:
scrapy crawl eshouse -o eshouse.csv
以下为我的主代码与补充代码
import scrapy
from scrapy import Selector,Request
from zhihuSpider.items import eshouseItem
#Item的class放在下一个代码块
class EshousespiderSpider(scrapy.Spider):
name = "eshouse"
allowed_domains = ["cd.esf.fang.com"]
start_urls = ["https://cd.esf.fang.com/"]
def parse(self, response):
sel=Selector(response)
list_items=sel.css('body > div.main1200.clearfix > div.main945.floatl > div.shop_list.shop_list_4 > dl')
for list_item in list_items:
item=eshouseItem()
a=sel.css('dl > dd:nth-child(2) > h4 > a > span::text').extract_first()
item['title']=a.replace('\n\t\t\t\t\t\t\t\t\t\t\t\t','')
item['price']=sel.css('dl > dd.price_right > span.red > b::text').extract_first()
item['priceper']=sel.css('dl > dd.price_right > span:nth-child(2)::text').extract_first()
item['jiegou']=sel.css('dl > dd:nth-child(2) > p.tel_shop >a::text').extract_first()
item['transfer'] = sel.css('dl > dd:nth-child(2) > p.clearfix.label > span::text').extract_first()
item['positive'] = sel.css('dl > dd:nth-child(2) > p.clearfix.label > span::text').extract_first()
item["url"] = list_item.css("dl > dd:nth-child(2) > h4 > a::attr(href)").extract_first()
print('\n\n\n\n\n\n\n\n\n\n',item['title'])
yield item
class eshouseItem(scrapy.Item):
title=scrapy.Field()
price=scrapy.Field()
priceper=scrapy.Field()
jiegou=scrapy.Field()
transfer=scrapy.Field()
positive=scrapy.Field()
url=scrapy.Field()
```