Python爬虫的问题，急求大佬解惑

Traceback (most recent call last):
  File "C:\Users\asus\PycharmProjects\pythonProject2\完整代码.py", line 132, in <module>
    newsary=parseListLinks(newsurl)
  File "C:\Users\asus\PycharmProjects\pythonProject2\完整代码.py", line 62, in parseListLinks
    jd = json.loads('{' + res.text.lstrip('try{feedCardJsonpCallback(').rstrip(');}catch(e){};') + '}}',encoding='utf-8')
  File "C:\Users\asus\AppData\Local\Programs\Python\Python39\lib\json\__init__.py", line 359, in loads
    return cls(**kw).decode(s)
TypeError: __init__() got an unexpected keyword argument 'encoding'
以上是报错内容，我是用写的python代码试着爬取新浪的新闻网页

具体代码如下：

# -*- coding: utf-8 -*-
import json
import requests
import re
import pymysql
import jieba
import jieba.analyse
import numpy as np
import wordcloud
import PIL.Image as image
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
from collections import Counter
res=requests.get('http://news.sina.com.cn/china')
res.enconding='utf-8'
soup=BeautifulSoup(res.text,'html.parser')
url="https://feed.sina.com.cn/api/roll/get?pageid=121&lid=1356&num=20&versionNumber=1.2.4&page={}&encode=utf-8&callback=feedCardJsonpCallback"
def getcomments(newsurl):
    try:
        m=re.search('doc-i(.*).shtml',newsurl)
        newsid=m.group(1)
        comment_url="https://comment.sina.com.cn/page/info?version=1&format=json&channel=gn&newsid=comos-{}&group=undefined&compress=0&ie=utf-8&oe=utf-8&page_size=3&t_size=3&h_size=3"
        comments=requests.get(comment_url.format(newsid))
        jd=json.loads(comments.text)
        return jd['result']['count']['total']
    except KeyError:
        return 0
def getNewsDetail(newsurl):
    headers={
    'Referer':'https://news.sina.com.cn/china/',
    'accept':'*/*',
    'accept-encoding':'gzip,deflate,br',
    'accept-language':'zh-CN,zh;q=0.9',
    'User-agent':'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit/537.36(LHTML,like Gecko) Chrome/80.0.3987.149 Safari/537.36'
    }
    result={}
    res=requests.get(newsurl,headers=headers)
    res.encoding = 'utf-8'
    soup=BeautifulSoup(res.text,'html.parser')
    result['title']=soup.select('.main-title')[0].text
    timesource=soup.select('.date')[0].contents[0].strip()
    result['dt']=timesource
    result['article']=' '.join([p.text.strip() for p in soup.select('.article p')[:-1]])
    result['editor']=soup.select('.show_author')[0].text.lstrip('责任编辑:')
    try:
        result['source']=soup.select('.date-source a')[0].text
    except IndexError:
        result['source']=soup.select('.source')[0].text
    result['comments']=getcomments(newsurl)
    return result
def parseListLinks(url):
    headers = {
        'Referer': 'https://news.sina.com.cn/china/',
        'accept': '*/*',
        'accept-encoding': 'gzip,deflate,br',
        'accept-language': 'zh-CN,zh;q=0.9',
        'User-agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit/537.36(LHTML,like Gecko) Chrome/80.0.3987.149 Safari/537.36'
    }
    newsdetails = []
    res = requests.get(url,headers=headers)
    res.encoding = 'utf-8'
    jd = json.loads('{' + res.text.lstrip('try{feedCardJsonpCallback(').rstrip(');}catch(e){};') + '}}',encoding='utf-8')
    for ent in jd['result']['data']:
        newsdetails.append(getNewsDetail(ent['url']))
    return newsdetails
class yfsql():
    db=None
    config={
        'host':"localhost",
        'port':3306,
        'username':"root",
        'password':'228742',
        'database':'data',
        'charset':"utf-8"
    }
    def connect(self):
        if self.db==None:
            self.db=pymysql.connect(
                host=self.config['host'],
                port=self.config['port'],
                user=self.config['username'],
                passwd=self.config['password'],
                db=self.config['database'],
                charset=self.config['charset']
            )
        return self.db
    def __init__(self):
        self.connect()
    def delete(self):
        if self.db!=None:
            self.db.close()

    def create(self,table1):
        cursor=self.connect().cursor()
        cursor.execute("DROP TABLE IF EXISTS"+table1)
        sql='create table'+table1+'(id int(11) not null auto_increment primary key,title varchar(50) not null,dt varchar(20) not null,editor varchar(50) not null,article text not null,commentscount int(5),source varchar(50))'
        try:
            cursor.execute(sql)
            self.connect().commit()
            print('创建数据库成功')
            return True
        except:
            self.connect().rollback()
            print('创建数据库失败')
            return False
    def query(self,sql1):
        cursor=self.connect().cursor()
        try:
            cursor.execute(sql1)
            data=cursor.fetchall()
            self.connect().commit()
        except:
            self.connect().rollback()
            return False
        return data
    def insert(self,value,table1):
        cursor=self.connect().cursor()
        sql2="INSERT INTO"+table1+"(title,dt,editor,article,commentscount,source)\VALUES(%s,%s,%s,%s,%s,%s)"
        try:
            cursor.execute(sql2,value)
            self.connect().commit()
            print('导入数据库成功')
            return True
        except:
            self.connect().rollback()
            print('导入数据库失败')
            return False

news_total=[]
for i in range(1,140):
    newsurl=url.format(i)
    newsary=parseListLinks(newsurl)
    news_total.extend(newsary)
mysql1=yfsql()
mysql1.create('ceshi')
for new_total in news_total:
    title=new_total['title']
    dt=new_total['dt']
    editor=new_total['editor']
    article=new_total['article']
    commentscount=new_total['comments']
    source=new_total['source']
    value=(title,dt,editor,article,commentscount,source)
    mysql1.insert(value)
results=[]
for x in mysql1.query('SELECT article FROM ceshi'):
    results.append(x)
file=open('ceshi.txt','a',encoding='utf-8')
file.write('\n'.join(['%s' %i for i in results]))
file.close()

def generate_image():
    jieba.analyse.set_stop_words('stopword.txt')
    content=open('ceshi.txt',encoding='utf-8').read()
    tags=jieba.analyse.extract_tags(content,topK=150,withWeight=False,allowPOS=())
    data=" ".join(tags)
    mask=np.array(image.open(r"C:\Users\asus\词云.jpg"))
    w=wordcloud.WordCloud(font_path='msyh.ttc',background_color='white',mask=mask)
    w.generate(data)
    w.to_file('ciyun.jpg')
    print('创建词云图成功')
generate_image()

def tfidf_list():
    jieba.analyse.set_stop_words('stopword.txt')
    content=open('ceshi.txt',encoding='utf-8').read()
    content=re.sub("[A-Za-z0-9\: \·\——\,\。\“\”\\(\)\,\‘\.\%]","",content)
    tags=jieba.analyse.extract_tags(content,topK=20,withWeight=True,allowPOS=())
    for x,w in tags:
        print(x+'\t'+str(w))
tfidf_list()

def generate_bar():
    jieba.analyse.set_stop_words('stopword.txt')
    content=open('ceshi.txt',encoding='utf-8').read()
    content=re.sub('[A-Za-z0-9\: \·\——\，\。\”\“\\(\)\，\‘\、\?\;\.\%]',"",content)
    content=str(content)
    tags=jieba.analyse.extract_tags(content,topK=30,withWeight=True,allowPOS=())
    y=[]  #关键词列表
    x=[]  #权重列表
    for (k,v) in tags:
        y.append(k)
        x.append(v)
    x1=np.array(x)
    fig,ax=plt.subplots(figsize=(30,12))
    plt.rcParams['font.size']=20
    plt.rcParams['font.sans-serif']=['SimHei']
    y_pos=np.arange(len(y))
    rects=ax.barth(y=y_pos,align='center',width=x1,)
    ax.set_yticks(y_pos)  #设置标度的位置
    ax.set_yticklabels(y)  #设置纵坐标的每一个刻度的属性值
    ax.set_xlabel('Importance')  #设置横坐标的单位
    ax.set_title('TF-IDF') #设定图片的标题
    for rect,y,num in zip(rects,y_pos,x1):
        x=rect.get_width()
        plt.text(x,y,"%f"%num)
    plt.savefig("barchart.png")
    bar=plt.show()
    return bar
generate_bar()

def generate_graph():
    a_title=[]
    a_count=[]
    a_source=[]
    for z,x,y in mysql1.query('SELECT article,title,commentscount FROM ceshi order by commentscount desc limit 15'):
        a_source.append(z)
        a_title.append(x)
        a_count.append(y)
#print(a_title)
#print(a_count)
#print(a_source)
    keylist=[]
    for i in a_source:
        i=re.sub("[:\·\——\，\。\“\”\\（\）\,\'\、\？\；]","",i)
        i=str(i)
        jieba.analyse.set_stop_words("stopword.txt")
        tags=jieba.analyse.extract_tags(i,topK=1,withWeight=False,allowPOS=())
        for i in tags:
            keylist.append(i)

    fig,ax=plt.subplots(figsize=(30,12))
    plt.rcParams['font.size']=20
    plt.rcParams['font.sans-serif']=['SimHei']
    colors=['lightcoral']
    plt.title('评论数-关键词对应图')
    plt.bar(keylist,a_count,color=colors)
    for x,y in zip(range(len(a_count)),a_count):
        plt.text(x+0.1,y+1,'%i'%y,ha='center',va='bottom')
    plt.savefig("graph.png")
    graph=plt.show()
    return graph
generate_graph()

def generate_pie():
    source_results=[]
    list=[]
    num=0
    for x in mysql1.query('SELECT source FROM ceshi'):
        x=str(x)
        x=re.sub("[A-Za-z0-9\:\·\——\，\。\“\”\\（\）\,\']","",x)
        source_results.append(x)
    for i in source_results:
        i=str(i)
        list.append(i)
    list=set(list)
    for j in list:
        num=num+1
    counter=Counter()
    count=0
    for word in source_results:
        count=count+1
        counter[word]+=1
    pro_list=[]  #吧各个新闻的来源的占比存为列表
    k_list=[]  #各个新闻来源标签存为列表
    for (k,v) in counter.most_common(15):
        k_list.append(k)
        pro=v/count  #计算新闻来源占比，结果为小数
        pro_list.append(pro)
    plt.rcParams['font.size']=20  #设置字体大小
    plt.rcParams['font.sans-serif']=['SimHei']
    fig,ax=plt.subplots(figsize=(30,12))  #设置画布大小
    colors=['lightcoral','orange','plum','c','pink']
    plt.pie(x=pro_list,radius=1.0,pctdistance=0.8,labels=k_list,colors=colors,startangle=90,autopct='%1.1f%%')
    x_0=[1,0,0,0]  #用于显示空心
    plt.pie(x_0,radius=0.6,colors='w')  #绘制空心圆
    plt.title('新闻来源占比图')
    plt.savefig("piechart.png")
    show=plt.show()
    return show
generate_pie()

写回答
好问题 0 提建议
关注问题
分享
邀请回答
编辑收藏删除结题
收藏举报

14条回答默认最新

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
ProfSnail 2021-02-22 22:07
关注
楼主请私信我，我可以挨个为你解答每个错误的产生原因以及如何修正。

本回答被题主选为最佳回答 , 对您是否有帮助呢?

解决无用
评论打赏
分享
举报

评论

按下Enter换行，Ctrl+Enter发表内容

查看更多回答(13条)

报告相同问题？

关注问题

python爬虫：Python 爬虫知识大全
2024-06-22 14:59

python爬虫：Python 爬虫知识大全； python爬虫：Python 爬虫知识大全； python爬虫：Python 爬虫知识大全； python爬虫：Python 爬虫知识大全； python爬虫：Python 爬虫知识大全； python爬虫：Python 爬虫知识...
python爬虫淘宝京东拼多多
2023-10-21 11:02

python爬虫淘宝京东拼多多python爬虫淘宝京东拼多多python爬虫淘宝京东拼多多python爬虫淘宝京东拼多多python爬虫淘宝京东拼多多python爬虫淘宝京东拼多多python爬虫淘宝京东拼多多python爬虫淘宝京东拼多多python...
python爬虫，爬虫破解pexels高清原图
2024-01-17 15:26

python爬虫，爬虫破解pexels高清原图python爬虫，爬虫破解pexels高清原图python爬虫，爬虫破解pexels高清原图python爬虫，爬虫破解pexels高清原图python爬虫，爬虫破解pexels高清原图python爬虫，爬虫破解pexels高清...
python爬虫，爬取贴吧
2024-01-17 15:23

python爬虫，爬取贴吧python爬虫，爬取贴吧python爬虫，爬取贴吧python爬虫，爬取贴吧python爬虫，爬取贴吧python爬虫，爬取贴吧python爬虫，爬取贴吧python爬虫，爬取贴吧python爬虫，爬取贴吧python爬虫，爬取贴吧...
Python爬虫基础知识和反爬机制（案例）
2023-12-27 14:08

Python爬虫基础知识和反爬机制（案例） Python爬虫基础知识和反爬机制（案例） Python爬虫基础知识和反爬机制（案例） Python爬虫基础知识和反爬机制（案例） Python爬虫基础知识和反爬机制（案例） Python爬虫基础...
基于python爬虫学习项目源码.zip
2023-09-23 17:51

基于python爬虫学习项目源码.zip基于python爬虫学习项目源码.zip基于python爬虫学习项目源码.zip基于python爬虫学习项目源码.zip基于python爬虫学习项目源码.zip基于python爬虫学习项目源码.zip基于python爬虫学习...
python爬虫源码爬取豆瓣内容python爬虫源码爬取豆瓣内容
2024-08-03 22:31

python爬虫案例，python爬虫源码爬取豆瓣内容python爬虫源码爬取豆瓣内容
简单的python爬虫，代码完整
2018-06-21 10:24

本资源提供了一个完整的Python2.7版本的简单网络爬虫代码，旨在帮助学习者理解和实践爬虫的基本原理。首先，我们要了解Python爬虫的基本构成。一个基础的Python爬虫通常包括以下部分： 1. **URL管理器**：负责...
Python爬虫实现自动登录、签到
2024-01-19 23:20

Python爬虫实现自动登录、签到 Python爬虫实现自动登录、签到 Python爬虫实现自动登录、签到 Python爬虫实现自动登录、签到 Python爬虫实现自动登录、签到 Python爬虫实现自动登录、签到
没有解决我的问题, 去提问

Python爬虫的问题，急求大佬解惑

14条回答 默认 最新

14条回答默认最新