通过scrapy爬取某网站数据在写入数据库时一直报:
AttributeError: 'MysqlPipeline' object has no attribute 'cursor'
本人跟着B站视频学习爬虫,以下代码跟着视频敲的,仔细核对后确认无误,但运行就是一直报上述错误,还望各位友友们能够帮助看一下。
1、项目结构如下:
2、以下为spiders目录下read.py文件源码:
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from scrapy_readbook_practice.items import ScrapyReadbookPracticeItem
class ReadSpider(CrawlSpider):
name = "read"
allowed_domains = ["www.dushu.com"]
start_urls = ["https://www.dushu.com/book/1188_1.html"]
rules = (
Rule(LinkExtractor(allow=r"/book/1188_\d+\.html"),
callback="parse_item",
follow=False),
)
def parse_item(self, response):
img_list = response.xpath('//div[@class="bookslist"]//li')
for img in img_list:
name = img.xpath('./div//a/img/@alt').extract_first()
src = img.xpath('./div//a/img/@data-original').extract_first()
book = ScrapyReadbookPracticeItem(name=name,src=src)
yield book
3、以下为pipelines.py文件源码:
from scrapy.utils.project import get_project_settings
import pymysql
class ScrapyReadbookPracticePipeline:
def open_spider(self,spider):
self.fp = open('book.json','w',encoding='utf-8')
def process_item(self, item, spider):
self.fp.write(str(item))
return item
def close_spider(self,spider):
self.fp.close()
class MysqlPipeline:
def open_spier(self,spider):
settings = get_project_settings()
self.host = settings['DB_HOST']
self.port = settings['DB_PORT']
self.user = settings['DB_USER']
self.password = settings['DB_PASSWORD']
self.database = settings['DB_DATABASE']
self.charset = settings['DB_CHARSET']
self.connect()
def connect(self):
self.conn = pymysql.connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
db=self.database,
charset=self.charset
)
# 创建游标
self.cursor = self.conn.cursor()
def process_item(self,item,spider):
sql = 'insert into book(name,src) values("{}","{}")'.format(item['name'],item['src'])
# 执行sql语句
self.cursor.execute(sql)
# 提交
self.conn.commit()
return item
def close_spider(self,spider):
self.cursor.close()
self.conn.close()
4、以下为settings.py文件源码:
DB_HOST = 'localhost'
DB_PORT = 3306
DB_USER = 'root'
DB_PASSWORD = 'dyj177035'
DB_DATABASE = 'scrapy_read_book'
DB_CHARSET = 'utf8'
ITEM_PIPELINES = {
"scrapy_readbook_practice.pipelines.ScrapyReadbookPracticePipeline": 300,
"scrapy_readbook_practice.pipelines.MysqlPipeline":301
}
5、以下为items.py文件源码:
import scrapy
class ScrapyReadbookPracticeItem(scrapy.Item):
name = scrapy.Field()
src = scrapy.Field()