Melokiku 2022-02-16 00:36 采纳率: 50%
浏览 120
已结题

用python做gephi文本贡献网络时,有一部分出错,TypeError: cannot unpack non-iterable NoneType object

问题遇到的现象和发生背景

用python做gephi文本贡献网络时,有一部分出错,TypeError: cannot unpack non-iterable NoneType object

问题相关代码,请勿粘贴截图
 #In[176]:


import jieba
import os
import re
import numpy as np
import jieba.posseg as psg
import networkx as nx
import pandas as pd
import math
os.chdir("/Users/melokiku/Desktop/LDA/data")


# In[179]:


def get_stop_dict(file):
   content = open(file,encoding="utf-8")
   word_list = []
   for c in content:
       c = re.sub('\n|\r','',c)
       word_list.append(c)
   return word_list


# In[180]:


def get_data(path):
    t = open(path,encoding="utf-8")
    data = t.read()
    t.close()
    return data


# In[199]:


def get_wordlist(text,maxn,synonym_words,stop_words)
    synonym_origin = list(synonym_words['origin'])
    synonym_new = list(synonym_words['new'])
    flag_list = ['n','nz','vn','nto','nh','nhd','nn','nnt','gg','j','a','v','r']#a,形容词,v,形容词,
    counts={}
    
    
    text_seg = psg.cut(text)
    for word_flag in text_seg:
        #word = re.sub(\"[^\\u4e00-\\u9fa5]\",\"\",word_flag.word)
        word = word_flag.word
        if word_flag.flag in flag_list and len(word)>1 and word not in stop_words:
            if word in synonym_origin:
                index = synonym_origin.index(word)
                word = synonym_new[index]
            counts[word]=counts.get(word,0)+1
            
            
    words= sorted(counts.items(),key=lambda x:x[1],reverse=True)
    words= list(dict(words).keys())[0:maxn]
    
    return words


# In[200]:


def get_t_seg(topwords,text,synonym_words,stop_words):
    word_docs = {}
    synonym_origin = list(synonym_words['origin'])
    synonym_new = list(synonym_words['new'])
    flag_list = ['n','nz','vn','nto','nh','nhd','nn','nnt','gg','j','a','v','r']#a,形容词,v,形容词
    
    text_lines_seg =[]
    text_lines = text.split("\n")
    for line in text_lines:
        t_seg = []
        text_seg = psg.cut(line)
    for word_flag in text_seg:
        #word = re.sub(\"[^\\u4e00-\\u9fa5]\",\"\",word_flag.word)
        word = word_flag.word
        if word_flag.flag in flag_list and len(word)>1 and word not in stop_words:
            if word in synonym_origin:
                word = synonym_new[synonym_origin.index(word)]
                if word in topwords:
                    t_seg.append(word)
                    
                    
    t_seg=list(set(t_seg))
    for word in t_seg:
        word_docs[word]=word_docs.get(word,0)+1
        text_lines_seg.append(t_seg)
        return text_lines_seg,word_docs


# In[201]:


def get_comatrix(text_lines_seg):
    comatrix = pd.DataFrame(np.zeros([len(topwords),len(topwords)]),columns=topwords,index=topwords)
    for t_seg in text_lines_seg:
        for i in range(len(t_seg)-1):
            for j in range(i+1,len(t_seg)):
                comatrix.loc[t_seg[i],t_seg[j]]+=1
    for k in range(len(comatrix)):
        comatrix.iloc[k,k]=0
    return comatrix


# In[202]:


def get_pmi(word1,word2,word_docs,co_matrix,n):
    pw1 = word_docs[word1]/n
    pw2 = word_docs[word2]/n
    pw1w2 = (co_matrix.loc[word1][word2]+co_matrix.loc[word2][word1])/n
    if pw1w2/(pw1*pw2)<=0:
        return 0
    else:
        pmi = math.log2(pw1w2/(pw1*pw2))
    return pmi


# In[203]:


def get_net(copmi,topwords):
    g = nx.Graph()
    for i in range(len(topwords)-1):
        word = topwords[i]
        for j in range(i+1,len(topwords)):
            w=0
            word2 = topwords[j]
            w = copmi.loc[word][word2]+copmi.loc[word2][word]
            if w>0:
                g.add_edge(word,word2,weight=w)
    return g


# In[204]:


#文件路径
dic_file = "/Users/melokiku/Desktop/LDA/stop_dic/dict.txt"
stop_file = "/Users/melokiku/Desktop/LDA/stop_dic/stopwords.txt"
data_path = "/Users/melokiku/Desktop/LDA/data/df.txt"
synonym_file = "/Users/melokiku/Desktop/LDA/stop_dic/synonym_list.xlsx"


# In[205]:


#读取文件
data = get_data(data_path)
stop_words = get_stop_dict(stop_file)
jieba.load_userdict(dic_file)
synonym_words = pd.read_excel(synonym_file)


# In[206]:


#数据处理
n_topwords=200
topwords = get_wordlist(data,n_topwords,synonym_words,stop_words)
# In[207]:


t_segs,word_docs = get_t_seg(topwords,data,synonym_words,stop_words)
n = len(t_segs)
co_matrix = get_comatrix(t_segs)
运行结果及报错内容

TypeError Traceback (most recent call last)
Input In [207], in
----> 1 t_segs,word_docs = get_t_seg(topwords,data,synonym_words,stop_words)
2 n = len(t_segs)
3 co_matrix = get_comatrix(t_segs)

TypeError: cannot unpack non-iterable NoneType object

后续代码
# In[ ]:


copmi = pd.DataFrame(np.zeros([len(topwords),len(topwords)]),columns=topwords,index=topwords)
for i in range(len(topwords)-1):
    word1 = topwords[i]
    for j in range(i+1,len(topwords)):
        word2 = topwords[j]
        copmi[word1][word2] = get_pmi(word1,word2,word_docs,co_matrix,n)


# In[ ]:


co_net =get_net(copmi,topwords)


# In[ ]:


nx.write_gexf(co_net,"/Users/melokiku/Desktop/LDA/result/word_pmi.gexf")

没有经验一头雾水,求帮忙解决🙏

  • 写回答

5条回答

      报告相同问题?

      相关推荐 更多相似问题

      问题事件

      • 已结题 2月16日
      • 已采纳回答 2月16日
      • 创建了问题 2月16日

      悬赏问题

      • ¥15 使用DWY100k数据集对UEA进行测试,出现报错:IndexError: index 125000 is out of bounds for axis 0 with size 95500
      • ¥15 前端vue实现根据图片url生成pdf文件
      • ¥15 RfidReader资源Q个
      • ¥20 user-agent是否是唯一的,有没有可能相同
      • ¥15 关于#开会#的问题,如何解决?(语言-c++)
      • ¥15 关于#二十四点问题#的问题
      • ¥15 运行kitex的demon出错(求大家解决)
      • ¥15 开发一个类似 typora 这样的软件需要多少钱?
      • ¥15 clearcase7.0安装包
      • ¥15 断点回归模型月度核密度检验不连续