python scrapy 爬虫图片新手求助

求问大神 我这个data她怎么了
报错:
2020-02-07 09:24:55 [scrapy.utils.log] INFO: Scrapy 1.8.0 started (bot: meizitu)
2020-02-07 09:24:55 [scrapy.utils.log] INFO: Versions: lxml 4.5.0.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.5.2, w3lib 1.21.0, Twisted 19.10.0, Python 3.7.3 (v3.7.3:ef4ec6ed12, Mar 25 2019, 22:22:05) [MSC v.1916
64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1d 10 Sep 2019), cryptography 2.8, Platform Windows-10-10.0.17763-SP0
2020-02-07 09:24:55 [scrapy.crawler] INFO: Overridden settings: {'BOT_NAME': 'meizitu', 'NEWSPIDER_MODULE': 'meizitu.spiders', 'ROBOTSTXT_OBEY': True, 'SPIDER_MODULES': ['meizitu.spiders']}
2020-02-07 09:24:55 [scrapy.extensions.telnet] INFO: Telnet Password: 0936097982b9bcc8
2020-02-07 09:24:55 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
'scrapy.extensions.telnet.TelnetConsole',
'scrapy.extensions.logstats.LogStats']
2020-02-07 09:24:56 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
'scrapy.downloadermiddlewares.retry.RetryMiddleware',
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
'scrapy.downloadermiddlewares.stats.DownloaderStats']
2020-02-07 09:24:56 [scrapy.middleware] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
'scrapy.spidermiddlewares.referer.RefererMiddleware',
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
'scrapy.spidermiddlewares.depth.DepthMiddleware']
Unhandled error in Deferred:
2020-02-07 09:24:56 [twisted] CRITICAL: Unhandled error in Deferred:

Traceback (most recent call last):
File "e:\python3.7\lib\site-packages\scrapy\crawler.py", line 184, in crawl
return self._crawl(crawler, *args, **kwargs)
File "e:\python3.7\lib\site-packages\scrapy\crawler.py", line 188, in crawl
d = crawler.crawl(*args, **kwargs)
File "e:\python3.7\lib\site-packages\twisted\internet\defer.py", line 1613, in unwindGenerator
return _cancellableInlineCallbacks(gen)
File "e:\python3.7\lib\site-packages\twisted\internet\defer.py", line 1529, in _cancellableInlineCallbacks
_inlineCallbacks(None, g, status)
--- ---
File "e:\python3.7\lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
result = g.send(result)
File "e:\python3.7\lib\site-packages\scrapy\crawler.py", line 86, in crawl
self.engine = self._create_engine()
File "e:\python3.7\lib\site-packages\scrapy\crawler.py", line 111, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "e:\python3.7\lib\site-packages\scrapy\core\engine.py", line 70, in __init
_
self.scraper = Scraper(crawler)
File "e:\python3.7\lib\site-packages\scrapy\core\scraper.py", line 71, in init
self.itemproc = itemproc_cls.from_crawler(crawler)
File "e:\python3.7\lib\site-packages\scrapy\middleware.py", line 53, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "e:\python3.7\lib\site-packages\scrapy\middleware.py", line 34, in from_settings
mwcls = load_object(clspath)
File "e:\python3.7\lib\site-packages\scrapy\utils\misc.py", line 46, in load_object
mod = import_module(module)
File "e:\python3.7\lib\importlib__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "", line 1006, in _gcd_import

File "", line 983, in _find_and_load

File "", line 967, in _find_and_load_unlocked

File "", line 677, in _load_unlocked

File "", line 724, in exec_module

File "", line 860, in get_code

File "", line 791, in source_to_code

File "", line 219, in _call_with_frames_removed

builtins.SyntaxError: unexpected EOF while parsing (pipelines.py, line 22)

2020-02-07 09:24:56 [twisted] CRITICAL:
Traceback (most recent call last):
File "e:\python3.7\lib\site-packages\twisted\internet\defer.py", line 1418, in inlineCallbacks
result = g.send(result)
File "e:\python3.7\lib\site-packages\scrapy\crawler.py", line 86, in crawl
self.engine = self._create_engine()
File "e:\python3.7\lib\site-packages\scrapy\crawler.py", line 111, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "e:\python3.7\lib\site-packages\scrapy\core\engine.py", line 70, in __init
_
self.scraper = Scraper(crawler)
File "e:\python3.7\lib\site-packages\scrapy\core\scraper.py", line 71, in init
self.itemproc = itemproc_cls.from_crawler(crawler)
File "e:\python3.7\lib\site-packages\scrapy\middleware.py", line 53, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "e:\python3.7\lib\site-packages\scrapy\middleware.py", line 34, in from_settings
mwcls = load_object(clspath)
File "e:\python3.7\lib\site-packages\scrapy\utils\misc.py", line 46, in load_object
mod = import_module(module)
File "e:\python3.7\lib\importlib__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "", line 1006, in _gcd_import
File "", line 983, in _find_and_load
File "", line 967, in _find_and_load_unlocked
File "", line 677, in _load_unlocked
File "", line 724, in exec_module
File "", line 860, in get_code
File "", line 791, in source_to_code
File "", line 219, in _call_with_frames_removed
File "E:\python_work\爬虫\meizitu\meizitu\pipelines.py", line 22
f.write(data)
^
SyntaxError: unexpected EOF while parsing
代码如下:

pipeline

import requests

class MeizituPipeline(object):
    def process_item(self, item, spider):
        print("main_title:",item['main_title'])
        print("main_image:", item['main_image'])
        print("main_tags:", item['main_tags'])
        print("main_meta:", item['main_meta'])
        print("page:", item['main_pagenavi'])
        url = requests.get(item['main_image'])
        print(url)
        try:
            with open(item['main_pagenavi'] +'.jpg','wb') as f:
                data = url.read()
                f.write(data)

image.py

import scrapy
from scrapy.http import response
from ..items import MeizituItem

class ImageSpider(scrapy.Spider):
    #定义Spider的名字scrapy crawl meiaitu
    name = 'SpiderMain'
    #允许爬虫的域名
    allowed_domains = ['www.mzitu.com/203554']
    #爬取的首页列表
    start_urls = ['https://www.mzitu.com/203554']
    #负责提取response的信息
    #response代表下载器从start_urls中的url的到的回应
    #提取的信息
    def parse(self,response):
        #遍历所有节点
        for Main in response.xpath('//div[@class = "main"]'):
            item = MeizituItem()
            #匹配所有节点元素/html/body/div[2]/div[1]/div[3]/p/a
            content = Main.xpath('//div[@class = "content"]')
            item['main_title'] = content.xpath('./h2/text()')
            item['main_image'] = content.xpath('./div[@class="main-image"]/p/a/img')
            item['main_meta'] = content.xpath('./div[@class="main-meta"]/span/text()').extract()
            item['main_tags'] = content.xpath('./div[@class="main-tags"]/a/text()').extract()
            item['main_pagenavi'] = content.xpath('./div[@class="main_pagenavi"]/span/text()').extract_first()
            yield item
            new_links = response.xpath('.//div[@class="pagenavi"]/a/@href').extract()
            new_link =new_links[-1]
            yield scrapy.Request(new_link,callback=self.parse)

setting


BOT_NAME = 'meizitu'

SPIDER_MODULES = ['meizitu.spiders']
NEWSPIDER_MODULE = 'meizitu.spiders'

ROBOTSTXT_OBEY = True
#配置默认请求头
DEFAULT_REQUEST_HEADERS = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36",
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
}
ITEM_PIPELINES = {
    'meizitu.pipelines.MeizituPipeline':300,
    }
IMAGES_STORE = 'E:\python_work\爬虫\meizitu'
IMAGES_MIN_HEIGHT = 1050
IMAGES_MIN_WIDTH = 700

1个回答

qq_43656607
天不绝我 有人定期点赞,怎么回事啊、??
3 个月之前 回复
qq_43656607
天不绝我 不好意思,复制错了,是这个:https://blog.csdn.net/weixin_42812527/article/details/81366397
4 个月之前 回复
Xxxxxxxxxxs
Xxxxxxxxxxs 我用的scrapy 他用的soup 有点不一样?
4 个月之前 回复
Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问
相关内容推荐