用scrapy成功爬取了商品数据,但是到目录下却发现数据文件没有创建,郁闷。。pipelines文件代码如下
import codecs
import json
class AutopjtPipeline(object):
def _int_(self):
self.file=codecs.open("77.json","wb",encoding="utf-8")
def process_item(self, item, spider):
for j in range(0,len(item["name"])):
name = item["name"][j]
price=item["price"][j]
comnum = item["comnum"][j]
link = item["link"][j]
# 将当前页下第j个商品的name、price、comnum、link等信息处理一下,重新组合成一个字典
goods = {"name": name, "price": price, "comnum": comnum, "link": link}
# 将组合后的当前页中第j个商品的数据写入json文件
i = json.dumps(dict(goods), ensure_ascii=False)
line = i + '\n'
self.file.write(line)
# 返回item
return item
def close_spider(self,spider):
self.file.close()
同时报错
Traceback (most recent call last):
File "c:\users\93422\appdata\local\programs\python\python35\lib\site-packages\twisted\internet\defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "C:\Users\93422\Desktop\python\autopjt\autopjt\pipelines.py", line 28, in close_spider
self.file.close()
AttributeError: 'AutopjtPipeline' object has no attribute 'file'
items文件代码以及爬虫代码都基本没问题,爬虫代码如下
```import scrapy
from autopjt.items import AutopjtItem
from scrapy.http import Request
class AutospdSpider(scrapy.Spider):
name = 'autospd'
allowed_domains = ['dangdang.com']
start_urls = ['http://category.dangdang.com/pg1-cid4003872-srsort_sale_amt_desc.html'
]
def parse(self, response):
item=AutopjtItem()
item['name']=response.xpath("//p[@class='name']/@title").extract()
item['price']=response.xpath('//span[@class="price_n"]/text()').extract()
item['link']=response.xpath('//p[@class="name"]/@href').extract()
item['comnum']=response.xpath('//a[@ddclick]/text()').extract()
yield item
for i in range(1,20):
url="http://category.dangdang.com/pg"+str(i)+"-cid4003872-srsort_sale_amt_desc.html"
yield Request(url,callback=self.parse)