Melokiku 2022-02-16 00:36 采纳率: 50%
浏览 172
已结题

用python做gephi文本贡献网络时,有一部分出错,TypeError: cannot unpack non-iterable NoneType object

问题遇到的现象和发生背景

用python做gephi文本贡献网络时,有一部分出错,TypeError: cannot unpack non-iterable NoneType object

问题相关代码,请勿粘贴截图
 #In[176]:


import jieba
import os
import re
import numpy as np
import jieba.posseg as psg
import networkx as nx
import pandas as pd
import math
os.chdir("/Users/melokiku/Desktop/LDA/data")


# In[179]:


def get_stop_dict(file):
   content = open(file,encoding="utf-8")
   word_list = []
   for c in content:
       c = re.sub('\n|\r','',c)
       word_list.append(c)
   return word_list


# In[180]:


def get_data(path):
    t = open(path,encoding="utf-8")
    data = t.read()
    t.close()
    return data


# In[199]:


def get_wordlist(text,maxn,synonym_words,stop_words)
    synonym_origin = list(synonym_words['origin'])
    synonym_new = list(synonym_words['new'])
    flag_list = ['n','nz','vn','nto','nh','nhd','nn','nnt','gg','j','a','v','r']#a,形容词,v,形容词,
    counts={}
    
    
    text_seg = psg.cut(text)
    for word_flag in text_seg:
        #word = re.sub(\"[^\\u4e00-\\u9fa5]\",\"\",word_flag.word)
        word = word_flag.word
        if word_flag.flag in flag_list and len(word)>1 and word not in stop_words:
            if word in synonym_origin:
                index = synonym_origin.index(word)
                word = synonym_new[index]
            counts[word]=counts.get(word,0)+1
            
            
    words= sorted(counts.items(),key=lambda x:x[1],reverse=True)
    words= list(dict(words).keys())[0:maxn]
    
    return words


# In[200]:


def get_t_seg(topwords,text,synonym_words,stop_words):
    word_docs = {}
    synonym_origin = list(synonym_words['origin'])
    synonym_new = list(synonym_words['new'])
    flag_list = ['n','nz','vn','nto','nh','nhd','nn','nnt','gg','j','a','v','r']#a,形容词,v,形容词
    
    text_lines_seg =[]
    text_lines = text.split("\n")
    for line in text_lines:
        t_seg = []
        text_seg = psg.cut(line)
    for word_flag in text_seg:
        #word = re.sub(\"[^\\u4e00-\\u9fa5]\",\"\",word_flag.word)
        word = word_flag.word
        if word_flag.flag in flag_list and len(word)>1 and word not in stop_words:
            if word in synonym_origin:
                word = synonym_new[synonym_origin.index(word)]
                if word in topwords:
                    t_seg.append(word)
                    
                    
    t_seg=list(set(t_seg))
    for word in t_seg:
        word_docs[word]=word_docs.get(word,0)+1
        text_lines_seg.append(t_seg)
        return text_lines_seg,word_docs


# In[201]:


def get_comatrix(text_lines_seg):
    comatrix = pd.DataFrame(np.zeros([len(topwords),len(topwords)]),columns=topwords,index=topwords)
    for t_seg in text_lines_seg:
        for i in range(len(t_seg)-1):
            for j in range(i+1,len(t_seg)):
                comatrix.loc[t_seg[i],t_seg[j]]+=1
    for k in range(len(comatrix)):
        comatrix.iloc[k,k]=0
    return comatrix


# In[202]:


def get_pmi(word1,word2,word_docs,co_matrix,n):
    pw1 = word_docs[word1]/n
    pw2 = word_docs[word2]/n
    pw1w2 = (co_matrix.loc[word1][word2]+co_matrix.loc[word2][word1])/n
    if pw1w2/(pw1*pw2)<=0:
        return 0
    else:
        pmi = math.log2(pw1w2/(pw1*pw2))
    return pmi


# In[203]:


def get_net(copmi,topwords):
    g = nx.Graph()
    for i in range(len(topwords)-1):
        word = topwords[i]
        for j in range(i+1,len(topwords)):
            w=0
            word2 = topwords[j]
            w = copmi.loc[word][word2]+copmi.loc[word2][word]
            if w>0:
                g.add_edge(word,word2,weight=w)
    return g


# In[204]:


#文件路径
dic_file = "/Users/melokiku/Desktop/LDA/stop_dic/dict.txt"
stop_file = "/Users/melokiku/Desktop/LDA/stop_dic/stopwords.txt"
data_path = "/Users/melokiku/Desktop/LDA/data/df.txt"
synonym_file = "/Users/melokiku/Desktop/LDA/stop_dic/synonym_list.xlsx"


# In[205]:


#读取文件
data = get_data(data_path)
stop_words = get_stop_dict(stop_file)
jieba.load_userdict(dic_file)
synonym_words = pd.read_excel(synonym_file)


# In[206]:


#数据处理
n_topwords=200
topwords = get_wordlist(data,n_topwords,synonym_words,stop_words)
# In[207]:


t_segs,word_docs = get_t_seg(topwords,data,synonym_words,stop_words)
n = len(t_segs)
co_matrix = get_comatrix(t_segs)
运行结果及报错内容

TypeError Traceback (most recent call last)
Input In [207], in
----> 1 t_segs,word_docs = get_t_seg(topwords,data,synonym_words,stop_words)
2 n = len(t_segs)
3 co_matrix = get_comatrix(t_segs)

TypeError: cannot unpack non-iterable NoneType object

后续代码
# In[ ]:


copmi = pd.DataFrame(np.zeros([len(topwords),len(topwords)]),columns=topwords,index=topwords)
for i in range(len(topwords)-1):
    word1 = topwords[i]
    for j in range(i+1,len(topwords)):
        word2 = topwords[j]
        copmi[word1][word2] = get_pmi(word1,word2,word_docs,co_matrix,n)


# In[ ]:


co_net =get_net(copmi,topwords)


# In[ ]:


nx.write_gexf(co_net,"/Users/melokiku/Desktop/LDA/result/word_pmi.gexf")

没有经验一头雾水,求帮忙解决🙏

  • 写回答

5条回答 默认 最新

  • 谛凌 2022-02-16 02:10
    关注

    题主你的 get_t_seg 函数返回了None值导致报错,get_t_seg 函数这里,return 是不是写错地方了,应与上面的for对齐。下面是您的代码:

        for word in t_seg:
            word_docs[word]=word_docs.get(word,0)+1
            text_lines_seg.append(t_seg)
            return text_lines_seg,word_docs
    

    修改为:

        for word in t_seg:
            word_docs[word]=word_docs.get(word,0)+1
            text_lines_seg.append(t_seg)
        return text_lines_seg,word_docs
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论 编辑记录
查看更多回答(4条)

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 2月16日
  • 已采纳回答 2月16日
  • 创建了问题 2月16日

悬赏问题

  • ¥15 有人知道怎么将自己的迁移策略布到edgecloudsim上使用吗?
  • ¥15 错误 LNK2001 无法解析的外部符号
  • ¥50 安装pyaudiokits失败
  • ¥15 计组这些题应该咋做呀
  • ¥60 更换迈创SOL6M4AE卡的时候,驱动要重新装才能使用,怎么解决?
  • ¥15 让node服务器有自动加载文件的功能
  • ¥15 jmeter脚本回放有的是对的有的是错的
  • ¥15 r语言蛋白组学相关问题
  • ¥15 Python时间序列如何拟合疏系数模型
  • ¥15 求学软件的前人们指明方向🥺