from scrapy.contrib.spiders import CrawlSpider
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import Selector
from scrapy.selector import HtmlXPathSelector
from kkk1.items import Kkk1Item
from scrapy.http import Request
from scrapy.http import HtmlResponse
import re
import math
class DmozSpider(CrawlSpider):
name = "kkk1"
allowed_domains = ['item.jd.com']
start_urls = [
"http://item.jd.com/1130480.html"]
def parse(self, response):
item = Kkk1Item()
sel = Selector(response)
item['gid']=sel.select("//div[@class='w']/div[@class='right']/div[@id='product-detail']/div[@id='product-detail-1']/ul/li[2]/text()").extract()
item['name'] = sel.select("id('name')/h1/text()").extract()
item['brand'] = response.xpath("//div[@class='w']/div[@class='right']/div[@id='product-detail']/div[@id='product-detail-1']/ul/li[3]/a/text()").extract()
item['price']=sel.select("//*[@id='jd-price']").extract()
#item['price']=sel.select("//*[@id='jd-price']/text()").extract()
return item
控制台打出来的:
{
'brand': [u'\u5c0f\u7c73\uff08MI\uff09'],
'gid': [u'\u5546\u54c1\u7f16\u53f7\uff1a1130480'],
'name': [u'\u5c0f\u7c73 \u7ea2\u7c731s \u79fb\u52a83G\u624b\u673a\uff08\u91d1\u5c5e\u7070\uff09 TD-SCDMA/GSM \u53cc\u5361\u53cc\u5f85 \u79fb\u52a8\u5408\u7ea6\u7248\uff08\u4e0d\u542b\u5408\u7ea6\u8ba1\u5212\uff09']
'price': [u'《strong class="p-price" id="jd-price"></strong》'],
'salereminder': []}
u《strong class="p-price" id="jd-price"></strong》'里面的价格怎么没有?求大神,感激不尽,新手,已经困扰我好几天了,就是没办法啊,