使用scrapy配置通用爬虫的时候怎么parse_item()函数里的数据没用解析出来为什么
这是代码
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from scrapyuniversal.utils import get_config
from scrapyuniversal.rules import rules
from scrapyuniversal import urls
class UniversalSpider(CrawlSpider):
name = 'universal'
def __init__(self, name, *args, **kwargs):
super().__init__(*args, **kwargs)
config = get_config(name)
self.config = config
self.rules = rules.get(config.get('rules'))
start_urls = config.get('start_urls')
print(start_urls)
if start_urls:
if start_urls.get('type') == 'static':
self.start_urls = start_urls.get('value')
elif start_urls.get('type') == 'dynamic':
self.start_urls = list(eval('urls.' + start_urls.get('method'))(*start_urls.get('args', [])))
self.allowed_domains = config.get('allowed_domains')
# super(UniversalSpider, self).__init__(*args, **kwargs)
def parse_item(self, response):
item = self.config.get('item')
if item:
cls = eval(item.get('class'))()
loader = eval(item.get('loader'))(cls, response=response)
# 动态获取属性配置
for key, value in item.get('attrs').items():
for extractor in value:
if extractor.get('method') == 'xpath':
loader.add_xpath(key, *extractor.get('args'), **{'re': extractor.get('re')})
if extractor.get('method') == 'css':
loader.add_css(key, *extractor.get('args'), **{'re': extractor.get('re')})
if extractor.get('method') == 'value':
loader.add_value(key, *extractor.get('args'), **{'re': extractor.get('re')})
if extractor.get('method') == 'attr':
loader.add_value(key, getattr(response, *extractor.get('args')))
yield loader.load_item()
这是运行结果截图