emiyashiro406
让你见笑了
采纳率100%
2021-02-21 21:59

Python爬虫的问题,急求大佬解惑

50
已采纳
Traceback (most recent call last):
  File "C:\Users\asus\PycharmProjects\pythonProject2\完整代码.py", line 132, in <module>
    newsary=parseListLinks(newsurl)
  File "C:\Users\asus\PycharmProjects\pythonProject2\完整代码.py", line 62, in parseListLinks
    jd = json.loads('{' + res.text.lstrip('try{feedCardJsonpCallback(').rstrip(');}catch(e){};') + '}}',encoding='utf-8')
  File "C:\Users\asus\AppData\Local\Programs\Python\Python39\lib\json\__init__.py", line 359, in loads
    return cls(**kw).decode(s)
TypeError: __init__() got an unexpected keyword argument 'encoding'
以上是报错内容,我是用写的python代码试着爬取新浪的新闻网页

具体代码如下:

# -*- coding: utf-8 -*-
import json
import requests
import re
import pymysql
import jieba
import jieba.analyse
import numpy as np
import wordcloud
import PIL.Image as image
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
from collections import Counter
res=requests.get('http://news.sina.com.cn/china')
res.enconding='utf-8'
soup=BeautifulSoup(res.text,'html.parser')
url="https://feed.sina.com.cn/api/roll/get?pageid=121&lid=1356&num=20&versionNumber=1.2.4&page={}&encode=utf-8&callback=feedCardJsonpCallback"
def getcomments(newsurl):
    try:
        m=re.search('doc-i(.*).shtml',newsurl)
        newsid=m.group(1)
        comment_url="https://comment.sina.com.cn/page/info?version=1&format=json&channel=gn&newsid=comos-{}&group=undefined&compress=0&ie=utf-8&oe=utf-8&page_size=3&t_size=3&h_size=3"
        comments=requests.get(comment_url.format(newsid))
        jd=json.loads(comments.text)
        return jd['result']['count']['total']
    except KeyError:
        return 0
def getNewsDetail(newsurl):
    headers={
    'Referer':'https://news.sina.com.cn/china/',
    'accept':'*/*',
    'accept-encoding':'gzip,deflate,br',
    'accept-language':'zh-CN,zh;q=0.9',
    'User-agent':'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit/537.36(LHTML,like Gecko) Chrome/80.0.3987.149 Safari/537.36'
    }
    result={}
    res=requests.get(newsurl,headers=headers)
    res.encoding = 'utf-8'
    soup=BeautifulSoup(res.text,'html.parser')
    result['title']=soup.select('.main-title')[0].text
    timesource=soup.select('.date')[0].contents[0].strip()
    result['dt']=timesource
    result['article']=' '.join([p.text.strip() for p in soup.select('.article p')[:-1]])
    result['editor']=soup.select('.show_author')[0].text.lstrip('责任编辑:')
    try:
        result['source']=soup.select('.date-source a')[0].text
    except IndexError:
        result['source']=soup.select('.source')[0].text
    result['comments']=getcomments(newsurl)
    return result
def parseListLinks(url):
    headers = {
        'Referer': 'https://news.sina.com.cn/china/',
        'accept': '*/*',
        'accept-encoding': 'gzip,deflate,br',
        'accept-language': 'zh-CN,zh;q=0.9',
        'User-agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit/537.36(LHTML,like Gecko) Chrome/80.0.3987.149 Safari/537.36'
    }
    newsdetails = []
    res = requests.get(url,headers=headers)
    res.encoding = 'utf-8'
    jd = json.loads('{' + res.text.lstrip('try{feedCardJsonpCallback(').rstrip(');}catch(e){};') + '}}',encoding='utf-8')
    for ent in jd['result']['data']:
        newsdetails.append(getNewsDetail(ent['url']))
    return newsdetails
class yfsql():
    db=None
    config={
        'host':"localhost",
        'port':3306,
        'username':"root",
        'password':'228742',
        'database':'data',
        'charset':"utf-8"
    }
    def connect(self):
        if self.db==None:
            self.db=pymysql.connect(
                host=self.config['host'],
                port=self.config['port'],
                user=self.config['username'],
                passwd=self.config['password'],
                db=self.config['database'],
                charset=self.config['charset']
            )
        return self.db
    def __init__(self):
        self.connect()
    def delete(self):
        if self.db!=None:
            self.db.close()

    def create(self,table1):
        cursor=self.connect().cursor()
        cursor.execute("DROP TABLE IF EXISTS"+table1)
        sql='create table'+table1+'(id int(11) not null auto_increment primary key,title varchar(50) not null,dt varchar(20) not null,editor varchar(50) not null,article text not null,commentscount int(5),source varchar(50))'
        try:
            cursor.execute(sql)
            self.connect().commit()
            print('创建数据库成功')
            return True
        except:
            self.connect().rollback()
            print('创建数据库失败')
            return False
    def query(self,sql1):
        cursor=self.connect().cursor()
        try:
            cursor.execute(sql1)
            data=cursor.fetchall()
            self.connect().commit()
        except:
            self.connect().rollback()
            return False
        return data
    def insert(self,value,table1):
        cursor=self.connect().cursor()
        sql2="INSERT INTO"+table1+"(title,dt,editor,article,commentscount,source)\VALUES(%s,%s,%s,%s,%s,%s)"
        try:
            cursor.execute(sql2,value)
            self.connect().commit()
            print('导入数据库成功')
            return True
        except:
            self.connect().rollback()
            print('导入数据库失败')
            return False

news_total=[]
for i in range(1,140):
    newsurl=url.format(i)
    newsary=parseListLinks(newsurl)
    news_total.extend(newsary)
mysql1=yfsql()
mysql1.create('ceshi')
for new_total in news_total:
    title=new_total['title']
    dt=new_total['dt']
    editor=new_total['editor']
    article=new_total['article']
    commentscount=new_total['comments']
    source=new_total['source']
    value=(title,dt,editor,article,commentscount,source)
    mysql1.insert(value)
results=[]
for x in mysql1.query('SELECT article FROM ceshi'):
    results.append(x)
file=open('ceshi.txt','a',encoding='utf-8')
file.write('\n'.join(['%s' %i for i in results]))
file.close()

def generate_image():
    jieba.analyse.set_stop_words('stopword.txt')
    content=open('ceshi.txt',encoding='utf-8').read()
    tags=jieba.analyse.extract_tags(content,topK=150,withWeight=False,allowPOS=())
    data=" ".join(tags)
    mask=np.array(image.open(r"C:\Users\asus\词云.jpg"))
    w=wordcloud.WordCloud(font_path='msyh.ttc',background_color='white',mask=mask)
    w.generate(data)
    w.to_file('ciyun.jpg')
    print('创建词云图成功')
generate_image()

def tfidf_list():
    jieba.analyse.set_stop_words('stopword.txt')
    content=open('ceshi.txt',encoding='utf-8').read()
    content=re.sub("[A-Za-z0-9\: \·\——\,\。\“\”\\(\)\,\‘\.\%]","",content)
    tags=jieba.analyse.extract_tags(content,topK=20,withWeight=True,allowPOS=())
    for x,w in tags:
        print(x+'\t'+str(w))
tfidf_list()

def generate_bar():
    jieba.analyse.set_stop_words('stopword.txt')
    content=open('ceshi.txt',encoding='utf-8').read()
    content=re.sub('[A-Za-z0-9\: \·\——\,\。\”\“\\(\)\,\‘\、\?\;\.\%]',"",content)
    content=str(content)
    tags=jieba.analyse.extract_tags(content,topK=30,withWeight=True,allowPOS=())
    y=[]  #关键词列表
    x=[]  #权重列表
    for (k,v) in tags:
        y.append(k)
        x.append(v)
    x1=np.array(x)
    fig,ax=plt.subplots(figsize=(30,12))
    plt.rcParams['font.size']=20
    plt.rcParams['font.sans-serif']=['SimHei']
    y_pos=np.arange(len(y))
    rects=ax.barth(y=y_pos,align='center',width=x1,)
    ax.set_yticks(y_pos)  #设置标度的位置
    ax.set_yticklabels(y)  #设置纵坐标的每一个刻度的属性值
    ax.set_xlabel('Importance')  #设置横坐标的单位
    ax.set_title('TF-IDF') #设定图片的标题
    for rect,y,num in zip(rects,y_pos,x1):
        x=rect.get_width()
        plt.text(x,y,"%f"%num)
    plt.savefig("barchart.png")
    bar=plt.show()
    return bar
generate_bar()

def generate_graph():
    a_title=[]
    a_count=[]
    a_source=[]
    for z,x,y in mysql1.query('SELECT article,title,commentscount FROM ceshi order by commentscount desc limit 15'):
        a_source.append(z)
        a_title.append(x)
        a_count.append(y)
#print(a_title)
#print(a_count)
#print(a_source)
    keylist=[]
    for i in a_source:
        i=re.sub("[:\·\——\,\。\“\”\\(\)\,\'\、\?\;]","",i)
        i=str(i)
        jieba.analyse.set_stop_words("stopword.txt")
        tags=jieba.analyse.extract_tags(i,topK=1,withWeight=False,allowPOS=())
        for i in tags:
            keylist.append(i)

    fig,ax=plt.subplots(figsize=(30,12))
    plt.rcParams['font.size']=20
    plt.rcParams['font.sans-serif']=['SimHei']
    colors=['lightcoral']
    plt.title('评论数-关键词对应图')
    plt.bar(keylist,a_count,color=colors)
    for x,y in zip(range(len(a_count)),a_count):
        plt.text(x+0.1,y+1,'%i'%y,ha='center',va='bottom')
    plt.savefig("graph.png")
    graph=plt.show()
    return graph
generate_graph()

def generate_pie():
    source_results=[]
    list=[]
    num=0
    for x in mysql1.query('SELECT source FROM ceshi'):
        x=str(x)
        x=re.sub("[A-Za-z0-9\:\·\——\,\。\“\”\\(\)\,\']","",x)
        source_results.append(x)
    for i in source_results:
        i=str(i)
        list.append(i)
    list=set(list)
    for j in list:
        num=num+1
    counter=Counter()
    count=0
    for word in source_results:
        count=count+1
        counter[word]+=1
    pro_list=[]  #吧各个新闻的来源的占比存为列表
    k_list=[]  #各个新闻来源标签存为列表
    for (k,v) in counter.most_common(15):
        k_list.append(k)
        pro=v/count  #计算新闻来源占比,结果为小数
        pro_list.append(pro)
    plt.rcParams['font.size']=20  #设置字体大小
    plt.rcParams['font.sans-serif']=['SimHei']
    fig,ax=plt.subplots(figsize=(30,12))  #设置画布大小
    colors=['lightcoral','orange','plum','c','pink']
    plt.pie(x=pro_list,radius=1.0,pctdistance=0.8,labels=k_list,colors=colors,startangle=90,autopct='%1.1f%%')
    x_0=[1,0,0,0]  #用于显示空心
    plt.pie(x_0,radius=0.6,colors='w')  #绘制空心圆
    plt.title('新闻来源占比图')
    plt.savefig("piechart.png")
    show=plt.show()
    return show
generate_pie()
  • 点赞
  • 写回答
  • 关注问题
  • 收藏
  • 复制链接分享
  • 邀请回答

14条回答

  • ProfSnail ProfSnail 2月前

    楼主请私信我,我可以挨个为你解答每个错误的产生原因以及如何修正。

    点赞 评论 复制链接分享
  • qq_22475211 深白色的风 2月前
    第44行代码修改如下
    if len(soup.select('.show_author'))==0:
        result['editor'] ="None"
    else:
        result['editor']=soup.select('.show_author')[0].text.lstrip('责任编辑:')

    报错的原因在于爬取的网页文章有时候并没有责任编辑,也就是soup.select('.show_author')为空,例如https://news.sina.com.cn/c/2021-02-22/doc-ikftssap8152936.shtml

    缺少其他元素的报错,同理

    点赞 2 评论 复制链接分享
  • emiyashiro406 让你见笑了 2月前

    Traceback (most recent call last):
      File "C:\Users\asus\PycharmProjects\pythonProject2\完整代码.py", line 132, in <module>
        newsary=parseListLinks(newsurl)
      File "C:\Users\asus\PycharmProjects\pythonProject2\完整代码.py", line 64, in parseListLinks
        newsdetails.append(getNewsDetail(ent['url']))
      File "C:\Users\asus\PycharmProjects\pythonProject2\完整代码.py", line 44, in getNewsDetail
        result['editor']=soup.select('.show_author')[0].text.lstrip('责任编辑:')
    IndexError: list index out of range

    看上面有大佬说的去掉encoding,还是报错,但是原因变了

    点赞 1 评论 复制链接分享
  • emiyashiro406 让你见笑了 2月前

    主要,这几个报错都是什么意思啊?我都不知道到底什么问题

    点赞 1 评论 复制链接分享
  • ProfSnail ProfSnail 2月前

    看起来只是Json.loads()传进去的参数错了?

    json.loads()在版本3.9中已经移除了encoding这个参数。我看到你的路径是python3.9的路径,所以你可以尝试删除encoding='utf-8'以停止这项错误;或者降低版本以使用encoding这个参数。

    Changed in version 3.9: The keyword argument encoding has been removed.

    (版本3.9的更新中:关键词encoding已经被移除)。

    json.loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw);

    在版本3.9以上,encoding='utf-8'被移除之后,想要使用encoding参数,建议你使用这种方法:

    json.JSONEncoder().encode({"foo": ["bar", "baz"]})
    点赞 1 评论 复制链接分享
  • L20517496 Admin_Poker 2月前
    'accept-encoding':'gzip,deflate,br',吧这个头去掉
    点赞 评论 复制链接分享
  • qq_22475211 深白色的风 2月前

    两台电脑都测试了这段代码,都没有报错。

    点赞 评论 复制链接分享
  • qq_22475211 深白色的风 2月前
    23行后插入print(comments.text)看看,应该是这个comments.text不符合json格式,修改一下就好了
    点赞 评论 复制链接分享
  • emiyashiro406 让你见笑了 2月前

     

    经过对缺少元素产生的问题的修改之后新的报错产生了……看起来又回到了解码转码的问题,但我已经切成3.6的版本了,为什么还会报错?

    点赞 评论 复制链接分享
  • qq_22475211 深白色的风 2月前
    第44行代码修改如下
    if len(soup.select('.show_author'))==0:
        result['editor'] ="None"
    else:
        result['editor']=soup.select('.show_author')[0].text.lstrip('责任编辑:')

    报错的原因在于爬取的网页文章有时候并没有责任编辑,也就是soup.select('.show_author')为空,例如https://news.sina.com.cn/c/2021-02-22/doc-ikftssap8152936.shtml

    缺少其他元素的报错,同理

    点赞 评论 复制链接分享
  • weixin_40960688 浮世绘太空 2月前

    return cls(**kw).encode(s)
    点赞 评论 复制链接分享
  • weixin_42678268 bj_0163_bj 2月前

    看版本吧,警告中已经说了,在3.9移除,我在3.6 上运行良好

    点赞 评论 复制链接分享
  • bosaidongmomo bosaidongmomo 2月前

    notebook上面只有warning...

     

    小错误一堆....还有一些jpg拿不到...我只修改了部分语法错误...

    # -*- coding: utf-8 -*-
    import json
    import requests
    import re
    import pymysql
    import jieba
    import jieba.analyse
    import numpy as np
    import wordcloud
    import PIL.Image as image
    from bs4 import BeautifulSoup
    from matplotlib import pyplot as plt
    from collections import Counter
    res=requests.get('http://news.sina.com.cn/china')
    res.enconding='utf-8'
    soup=BeautifulSoup(res.text,'html.parser')
    url="https://feed.sina.com.cn/api/roll/get?pageid=121&lid=1356&num=20&versionNumber=1.2.4&page={}&encode=utf-8&callback=feedCardJsonpCallback"
    def getcomments(newsurl):
        try:
            m=re.search('doc-i(.*).shtml',newsurl)
            newsid=m.group(1)
            comment_url="https://comment.sina.com.cn/page/info?version=1&format=json&channel=gn&newsid=comos-{}&group=undefined&compress=0&ie=utf-8&oe=utf-8&page_size=3&t_size=3&h_size=3"
            comments=requests.get(comment_url.format(newsid))
            jd=json.loads(comments.text)
            return jd['result']['count']['total']
        except KeyError:
            return 0
    def getNewsDetail(newsurl):
        headers={
        'Referer':'https://news.sina.com.cn/china/',
        'accept':'*/*',
        'accept-encoding':'gzip,deflate,br',
        'accept-language':'zh-CN,zh;q=0.9',
        'User-agent':'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit/537.36(LHTML,like Gecko) Chrome/80.0.3987.149 Safari/537.36'
        }
        result={}
        res=requests.get(newsurl,headers=headers)
        res.encoding = 'utf-8'
        soup=BeautifulSoup(res.text,'html.parser')
        try:
            result['title']=soup.select('.main-title')[0].text
            timesource=soup.select('.date')[0].contents[0].strip()
            result['dt']=timesource
            result['article']=' '.join([p.text.strip() for p in soup.select('.article p')[:-1]])
            result['editor']=soup.select('.show_author')[0].text.lstrip('责任编辑:')
            result['source']=soup.select('.date-source a')[0].text
        except IndexError:
            result['source']=soup.select('.source')[0].text
        result['comments']=getcomments(newsurl)
        return result
    def parseListLinks(url):
        headers = {
            'Referer': 'https://news.sina.com.cn/china/',
            'accept': '*/*',
            'accept-encoding': 'gzip,deflate,br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'User-agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit/537.36(LHTML,like Gecko) Chrome/80.0.3987.149 Safari/537.36'
        }
        newsdetails = []
        res = requests.get(url,headers=headers)
        res.encoding = 'utf-8'
        jd = json.loads('{' + res.text.lstrip('try{feedCardJsonpCallback(').rstrip(');}catch(e){};') + '}}',encoding='utf-8')
        for ent in jd['result']['data']:
            newsdetails.append(getNewsDetail(ent['url']))
        return newsdetails
    class yfsql():
        db=None
        config={
            'host':"localhost",
            'port':3306,
            'username':"root",
            'password':'root',
            'database':'data',
            ###############################################################
            #################  utf8
            ###############################################################
            'charset':"utf8"
        }
        def connect(self):
            if self.db==None:
                self.db=pymysql.connect(
                    host=self.config['host'],
                    port=self.config['port'],
                    user=self.config['username'],
                    passwd=self.config['password'],
                    db=self.config['database'],
                    charset=self.config['charset']
                )
            return self.db
        def __init__(self):
            self.connect()
        def delete(self):
            if self.db!=None:
                self.db.close()
    
        def create(self,table1):
            cursor=self.connect().cursor()
            ###############################################################
            #################  这个地方加空格
            ###############################################################
            cursor.execute("DROP TABLE IF EXISTS "+table1)
            ###############################################################
            #################  这个地方加空格
            ###############################################################
            sql='create table '+table1+'(id int(11) not null auto_increment primary key,title varchar(50) not null,dt varchar(20) not null,editor varchar(50) not null,article text not null,commentscount int(5),source varchar(50))'
            try:
                cursor.execute(sql)
                self.connect().commit()
                print('创建数据库成功')
                return True
            ###############################################################
            #################  异常
            ###############################################################
            except:
                self.connect().rollback()
                print('创建数据库失败')
                return False
        def query(self,sql1):
            cursor=self.connect().cursor()
            try:
                cursor.execute(sql1)
                data=cursor.fetchall()
                self.connect().commit()
            except:
                self.connect().rollback()
                return False
            return data
        def insert(self,value,table1):
            cursor=self.connect().cursor()
            ###############################################################
            #################  去掉反斜杠
            ##############################################################
            sql2="INSERT INTO "+table1+"(title,dt,editor,article,commentscount,source) VALUES(%s,%s,%s,%s,%s,%s)"
            print(value)
            try:
                cursor.execute(sql2,value)
                self.connect().commit()
                print('导入数据库成功')
                return True
            except Exception as e:
                self.connect().rollback()
                print('导入数据库失败')
                return False
    
    news_total=[]
    for i in range(1,2):
        newsurl=url.format(i)
        newsary=parseListLinks(newsurl)
        news_total.extend(newsary)
    mysql1=yfsql()
    mysql1.create('ceshi')
    for new_total in news_total:
        title=new_total['title']
        dt=new_total['dt']
        editor=new_total['editor']
        article=new_total['article']
        commentscount=new_total['comments']
        source=new_total['source']
        value=(title,dt,editor,article,commentscount,source)
        ###############################################################
        #################  这个地方加 数据库表 ceshi
        ##############################################################
        mysql1.insert(value,'ceshi')
    results=[]
    for x in mysql1.query('SELECT article FROM ceshi'):
        results.append(x)
    file=open('ceshi.txt','a',encoding='utf-8')
    file.write('\n'.join(['%s' %i for i in results]))
    file.close()
    
    def generate_image():
        jieba.analyse.set_stop_words('stopword.txt')
        content=open('ceshi.txt',encoding='utf-8').read()
        tags=jieba.analyse.extract_tags(content,topK=150,withWeight=False,allowPOS=())
        data=" ".join(tags)
        mask=np.array(image.open(r"C:\Users\asus\词云.jpg"))
        w=wordcloud.WordCloud(font_path='msyh.ttc',background_color='white',mask=mask)
        w.generate(data)
        w.to_file('ciyun.jpg')
        print('创建词云图成功')
    generate_image()
    
    def tfidf_list():
        jieba.analyse.set_stop_words('stopword.txt')
        content=open('ceshi.txt',encoding='utf-8').read()
        content=re.sub("[A-Za-z0-9\: \·\——\,\。\“\”\\(\)\,\‘\.\%]","",content)
        tags=jieba.analyse.extract_tags(content,topK=20,withWeight=True,allowPOS=())
        for x,w in tags:
            print(x+'\t'+str(w))
    tfidf_list()
    
    def generate_bar():
        jieba.analyse.set_stop_words('stopword.txt')
        content=open('ceshi.txt',encoding='utf-8').read()
        content=re.sub('[A-Za-z0-9\: \·\——\,\。\”\“\\(\)\,\‘\、\?\;\.\%]',"",content)
        content=str(content)
        tags=jieba.analyse.extract_tags(content,topK=30,withWeight=True,allowPOS=())
        y=[]  #关键词列表
        x=[]  #权重列表
        for (k,v) in tags:
            y.append(k)
            x.append(v)
        x1=np.array(x)
        fig,ax=plt.subplots(figsize=(30,12))
        plt.rcParams['font.size']=20
        plt.rcParams['font.sans-serif']=['SimHei']
        y_pos=np.arange(len(y))
        rects=ax.barth(y=y_pos,align='center',width=x1,)
        ax.set_yticks(y_pos)  #设置标度的位置
        ax.set_yticklabels(y)  #设置纵坐标的每一个刻度的属性值
        ax.set_xlabel('Importance')  #设置横坐标的单位
        ax.set_title('TF-IDF') #设定图片的标题
        for rect,y,num in zip(rects,y_pos,x1):
            x=rect.get_width()
            plt.text(x,y,"%f"%num)
        plt.savefig("barchart.png")
        bar=plt.show()
        return bar
    generate_bar()
    
    def generate_graph():
        a_title=[]
        a_count=[]
        a_source=[]
        for z,x,y in mysql1.query('SELECT article,title,commentscount FROM ceshi order by commentscount desc limit 15'):
            a_source.append(z)
            a_title.append(x)
            a_count.append(y)
    #print(a_title)
    #print(a_count)
    #print(a_source)
        keylist=[]
        for i in a_source:
            i=re.sub("[:\·\——\,\。\“\”\\(\)\,\'\、\?\;]","",i)
            i=str(i)
            jieba.analyse.set_stop_words("stopword.txt")
            tags=jieba.analyse.extract_tags(i,topK=1,withWeight=False,allowPOS=())
            for i in tags:
                keylist.append(i)
    
        fig,ax=plt.subplots(figsize=(30,12))
        plt.rcParams['font.size']=20
        plt.rcParams['font.sans-serif']=['SimHei']
        colors=['lightcoral']
        plt.title('评论数-关键词对应图')
        plt.bar(keylist,a_count,color=colors)
        for x,y in zip(range(len(a_count)),a_count):
            plt.text(x+0.1,y+1,'%i'%y,ha='center',va='bottom')
        plt.savefig("graph.png")
        graph=plt.show()
        return graph
    generate_graph()
    
    def generate_pie():
        source_results=[]
        list=[]
        num=0
        for x in mysql1.query('SELECT source FROM ceshi'):
            x=str(x)
            x=re.sub("[A-Za-z0-9\:\·\——\,\。\“\”\\(\)\,\']","",x)
            source_results.append(x)
        for i in source_results:
            i=str(i)
            list.append(i)
        list=set(list)
        for j in list:
            num=num+1
        counter=Counter()
        count=0
        for word in source_results:
            count=count+1
            counter[word]+=1
        pro_list=[]  #吧各个新闻的来源的占比存为列表
        k_list=[]  #各个新闻来源标签存为列表
        for (k,v) in counter.most_common(15):
            k_list.append(k)
            pro=v/count  #计算新闻来源占比,结果为小数
            pro_list.append(pro)
        plt.rcParams['font.size']=20  #设置字体大小
        plt.rcParams['font.sans-serif']=['SimHei']
        fig,ax=plt.subplots(figsize=(30,12))  #设置画布大小
        colors=['lightcoral','orange','plum','c','pink']
        plt.pie(x=pro_list,radius=1.0,pctdistance=0.8,labels=k_list,colors=colors,startangle=90,autopct='%1.1f%%')
        x_0=[1,0,0,0]  #用于显示空心
        plt.pie(x_0,radius=0.6,colors='w')  #绘制空心圆
        plt.title('新闻来源占比图')
        plt.savefig("piechart.png")
        show=plt.show()
        return show
    generate_pie()
    点赞 评论 复制链接分享
  • huanhuilong 幻灰龙 2月前

    File "C:\Users\asus\PycharmProjects\pythonProject2\完整代码.py", line 62, in parseListLinks     jd = json.loads('{' + res.text.lstrip('try{feedCardJsonpCallback(').rstrip(');}catch(e){};') + '}}',encoding='utf-8')

    TypeError: __init__() got an unexpected keyword argument 'encoding'

    错误提示已经说明了,json.loads这个函数没有一个encoding的参数。参考文档:https://docs.python.org/3/library/json.html

     

    以及这里的例子:

    https://www.geeksforgeeks.org/json-loads-in-python/

     

    可以去掉:,encoding='utf-8' 

    点赞 评论 复制链接分享