芥末柚子 2019-12-27 12:28 采纳率: 0%
浏览 896

用情感词典进行中文情感分析,但算出来的分数都是零,python小白,还请大神支招!?

真的灰常小白了,代码是根据网上大神的代码做了微调,算出来分数都为零,呜呜,太难了。。。

import jieba
import numpy as np

posdict = open('C:/.../中文情感词典/正面.txt', encoding = 'utf-8').readlines()
negdict = open('C:/.../中文情感词典/负面.txt', encoding = 'utf-8').readlines()
deny_word = open('C:/.../中文情感词典/否定词.txt', encoding = 'utf-8').readlines()
mostdict = open("C:/.../中文情感词典/extreme.txt", encoding="utf-8").readlines()
verydict = open("C:/.../中文情感词典/very.txt", encoding = 'utf-8').readlines()
moredict = open("C:/.../中文情感词典/more.txt", encoding = 'utf-8').readlines()
ishdict = open("C:/.../中文情感词典/ish.txt", encoding = 'utf-8').readlines()
insufficientdict = open("C:/.../中文情感词典/insufficiently.txt", encoding = 'utf-8').readlines()
inversedict = open("C:/.../中文情感词典/over.txt", encoding = 'utf-8').readlines()
degree_word = open("C:/.../中文情感词典/程度级别词语.txt", encoding = 'utf-8').readlines()

def match(word, sentiment_value):
    if word in mostdict:
        sentiment_value *= 2.0
    elif word in verydict:
        sentiment_value *= 1.5
    elif word in moredict:
        sentiment_value *= 1.25
    elif word in ishdict:
        sentiment_value *= 1.2
    elif word in insufficientdict:
        sentiment_value *= 0.8
    elif word in inversedict:
        sentiment_value *= -1
    return sentiment_value

def sentiment_score_list(dataset):
    seg_sentence = dataset.split('。')
    count1 = []
    count2 = []
    for sen in seg_sentence: # 循环遍历每一个评论
        segtmp = jieba.lcut(sen, cut_all=False)  # 把句子进行分词,以列表的形式返回
        print(segtmp)
        i = 0 # 记录扫描到的词的位置
        a = 0 # 记录情感词的位置
        poscount = 0 # 积极词的第一次分值
        poscount2 = 0 # 积极词反转后的分值
        poscount3 = 0 # 积极词的最后分值(包括叹号的分值)
        negcount = 0 # 消极词的第一分值
        negcount2 = 0 # 消极词反转后的分值
        negcount3 = 0 # 消极词的最后分值 (包括叹号的分值)
        for word in segtmp:
            if word in posdict:  # 判断词语是否是情感词
                poscount += 1
                c = 0
                for w in segtmp[a:i]:  # 扫描情感词前的程度词
                    if w in mostdict:
                        poscount *= 2.0
                    elif w in verydict:
                        poscount *= 1.5
                    elif w in moredict:
                        poscount *= 1.25
                    elif w in ishdict:
                        poscount *= 1.2
                    elif word in insufficientdict:
                        poscount *= 0.8
                    elif word in inversedict:
                        poscount *= -1
                    elif w in deny_word:
                        c += 1

                if judgeodd(c) == 'odd':  # 扫描情感词前的否定词数
                    poscount *= -1.0
                    poscount2 += poscount
                    poscount = 0
                    poscount3 = poscount + poscount2 + poscount3
                    poscount2 = 0
                else:
                    poscount3 = poscount + poscount2 + poscount3
                    poscount = 0
                a = i + 1  # 情感词的位置变化

            elif word in negdict:  # 消极情感的分析,与上面一致
                negcount += -1
                d = 0
                for w in segtmp[a:i]:
                    if w in mostdict:
                        negcount *= -2.0
                    elif w in verydict:
                        negcount *= -1.5
                    elif w in moredict:
                        negcount *= -1.25
                    elif w in ishdict:
                        negcount *= -1.2
                    elif word in insufficientdict:
                        poscount *= -0.8
                    elif w in degree_word:
                        d += 1
                if judgeodd(d) == 'odd':
                    negcount *= -1.0
                    negcount2 += negcount
                    negcount = 0
                    negcount3 = negcount + negcount2 + negcount3
                    negcount2 = 0
                else:
                    negcount3 = negcount + negcount2 + negcount3
                    negcount = 0
                a = i + 1

            elif word == '!' or word == '!':  ## 判断句子是否有感叹号
                for w2 in segtmp[::-1]:  # 扫描感叹号前的情感词,发现后权值+2,然后退出循环
                    if w2 in posdict or negdict:
                        poscount3 += 2
                        negcount3 += 2
                        break
            i += 1 # 扫描词位置前移

            # 以下是防止出现负数的情况
            pos_count = 0
            neg_count = 0
            if poscount3 < 0 and negcount3 > 0:
                neg_count += negcount3 - poscount3
                pos_count = 0
            elif negcount3 < 0 and poscount3 > 0:
                pos_count = poscount3 - negcount3
                neg_count = 0
            elif poscount3 < 0 and negcount3 < 0:
                neg_count = -poscount3
                pos_count = -negcount3
            else:
                pos_count = poscount3
                neg_count = negcount3

            count1.append([pos_count, neg_count])
            count2.append(count1)
            count1 = []
            return count2

def sentiment_score(senti_score_list):
    score = []
    for review in senti_score_list:
        score_array = np.array(review)
        Pos = np.sum(score_array[:, 0]) # 积极
        Neg = np.sum(score_array[:, 1]) # 消极
        AvgPos = np.mean(score_array[:, 0])
        AvgPos = float('%.1f'%AvgPos)
        AvgNeg = np.mean(score_array[:, 1])
        AvgNeg = float('%.1f'%AvgNeg)
        StdPos = np.std(score_array[:, 0])
        StdPos = float('%.1f'%StdPos)
        StdNeg = np.std(score_array[:, 1])
        StdNeg = float('%.1f'%StdNeg)
        score.append([Pos, Neg, AvgPos, AvgNeg, StdPos, StdNeg])
    return score

data = '...'
data1= '...'
print(sentiment_score(sentiment_score_list(data)))
print(sentiment_score(sentiment_score_list(data1)))
  • 写回答

1条回答 默认 最新

  • 关注
    评论

报告相同问题?

悬赏问题

  • ¥15 使用C#,asp.net读取Excel文件并保存到Oracle数据库
  • ¥15 C# datagridview 单元格显示进度及值
  • ¥15 thinkphp6配合social login单点登录问题
  • ¥15 HFSS 中的 H 场图与 MATLAB 中绘制的 B1 场 部分对应不上
  • ¥15 如何在scanpy上做差异基因和通路富集?
  • ¥20 关于#硬件工程#的问题,请各位专家解答!
  • ¥15 关于#matlab#的问题:期望的系统闭环传递函数为G(s)=wn^2/s^2+2¢wn+wn^2阻尼系数¢=0.707,使系统具有较小的超调量
  • ¥15 FLUENT如何实现在堆积颗粒的上表面加载高斯热源
  • ¥30 截图中的mathematics程序转换成matlab
  • ¥15 动力学代码报错,维度不匹配