Python爬虫抓取信息存储到excel表格后,怎么实行数据可视化

我用python爬去了起点中文网的一些信息,并且存储到excel中,现在想要实现数据可视化怎么写应该
import requests
from lxml import etree
from openpyxl import Workbook

class Book():
def init(p):
p.url = 'https://www.qidian.com/rank/hotsales?page={页数}'
p.wb = Workbook() # class实例化
p.ws = p.wb.active # 激活工具表
p.ws.append(['书名', '作者', '类型', '连载状态']) # 添加对应的表头

def geturl(p):
    url = [p.url.format(页数 =i) for i in range(1,15)]
    return url
def parse_url(p,url):
    response =requests.get(url,timeout = 5)
    return response.content.decode('utf-8','ignore')
def get_list(p,html_str):
    html = etree.HTML(html_str)
    connect_list = []
    lists = html.xpath("//div[@class='book-img-text']/ul/li//div[@class='book-mid-info']")
    for list in lists:
        item = {}
        item['书名'] = ''.join(list.xpath("./h4/a/text()"))
        item['作者'] = ''.join(list.xpath("./p[@class='author']/a[1]/text()"))
        item['类型'] = ''.join(list.xpath("./p[@class='author']/a[2]/text()"))
        item['连载状态'] = ''.join(list.xpath("./p[@class='author']/span/text()"))
        connect_list.append(item)
    return connect_list
def save_list(p, connects):
    for connect in connects:
        p.ws.append([connect['书名'], connect['作者'], connect['类型'], connect['连载状态']])
    print('保存小说信息成功')
def run(p):
    url_list = p.geturl()
    for url in url_list:
        html_url =p.parse_url(url)
        connects = p.get_list(html_url)
        p.save_list(connects[:])
    p.wb.save('book.xlsx')

if name=='__main__':
spider = Book()
spider.run()

查看全部
Dnoyishi
Dnoyishi
2020/01/02 11:33
  • 爬虫
  • python
  • 数据可视化
  • 点赞
  • 收藏
  • 回答
    私信

1个回复