问题遇到的现象和发生背景
用python做gephi文本贡献网络时,有一部分出错,TypeError: cannot unpack non-iterable NoneType object
问题相关代码,请勿粘贴截图
#In[176]:
import jieba
import os
import re
import numpy as np
import jieba.posseg as psg
import networkx as nx
import pandas as pd
import math
os.chdir("/Users/melokiku/Desktop/LDA/data")
# In[179]:
def get_stop_dict(file):
content = open(file,encoding="utf-8")
word_list = []
for c in content:
c = re.sub('\n|\r','',c)
word_list.append(c)
return word_list
# In[180]:
def get_data(path):
t = open(path,encoding="utf-8")
data = t.read()
t.close()
return data
# In[199]:
def get_wordlist(text,maxn,synonym_words,stop_words)
synonym_origin = list(synonym_words['origin'])
synonym_new = list(synonym_words['new'])
flag_list = ['n','nz','vn','nto','nh','nhd','nn','nnt','gg','j','a','v','r']#a,形容词,v,形容词,
counts={}
text_seg = psg.cut(text)
for word_flag in text_seg:
#word = re.sub(\"[^\\u4e00-\\u9fa5]\",\"\",word_flag.word)
word = word_flag.word
if word_flag.flag in flag_list and len(word)>1 and word not in stop_words:
if word in synonym_origin:
index = synonym_origin.index(word)
word = synonym_new[index]
counts[word]=counts.get(word,0)+1
words= sorted(counts.items(),key=lambda x:x[1],reverse=True)
words= list(dict(words).keys())[0:maxn]
return words
# In[200]:
def get_t_seg(topwords,text,synonym_words,stop_words):
word_docs = {}
synonym_origin = list(synonym_words['origin'])
synonym_new = list(synonym_words['new'])
flag_list = ['n','nz','vn','nto','nh','nhd','nn','nnt','gg','j','a','v','r']#a,形容词,v,形容词
text_lines_seg =[]
text_lines = text.split("\n")
for line in text_lines:
t_seg = []
text_seg = psg.cut(line)
for word_flag in text_seg:
#word = re.sub(\"[^\\u4e00-\\u9fa5]\",\"\",word_flag.word)
word = word_flag.word
if word_flag.flag in flag_list and len(word)>1 and word not in stop_words:
if word in synonym_origin:
word = synonym_new[synonym_origin.index(word)]
if word in topwords:
t_seg.append(word)
t_seg=list(set(t_seg))
for word in t_seg:
word_docs[word]=word_docs.get(word,0)+1
text_lines_seg.append(t_seg)
return text_lines_seg,word_docs
# In[201]:
def get_comatrix(text_lines_seg):
comatrix = pd.DataFrame(np.zeros([len(topwords),len(topwords)]),columns=topwords,index=topwords)
for t_seg in text_lines_seg:
for i in range(len(t_seg)-1):
for j in range(i+1,len(t_seg)):
comatrix.loc[t_seg[i],t_seg[j]]+=1
for k in range(len(comatrix)):
comatrix.iloc[k,k]=0
return comatrix
# In[202]:
def get_pmi(word1,word2,word_docs,co_matrix,n):
pw1 = word_docs[word1]/n
pw2 = word_docs[word2]/n
pw1w2 = (co_matrix.loc[word1][word2]+co_matrix.loc[word2][word1])/n
if pw1w2/(pw1*pw2)<=0:
return 0
else:
pmi = math.log2(pw1w2/(pw1*pw2))
return pmi
# In[203]:
def get_net(copmi,topwords):
g = nx.Graph()
for i in range(len(topwords)-1):
word = topwords[i]
for j in range(i+1,len(topwords)):
w=0
word2 = topwords[j]
w = copmi.loc[word][word2]+copmi.loc[word2][word]
if w>0:
g.add_edge(word,word2,weight=w)
return g
# In[204]:
#文件路径
dic_file = "/Users/melokiku/Desktop/LDA/stop_dic/dict.txt"
stop_file = "/Users/melokiku/Desktop/LDA/stop_dic/stopwords.txt"
data_path = "/Users/melokiku/Desktop/LDA/data/df.txt"
synonym_file = "/Users/melokiku/Desktop/LDA/stop_dic/synonym_list.xlsx"
# In[205]:
#读取文件
data = get_data(data_path)
stop_words = get_stop_dict(stop_file)
jieba.load_userdict(dic_file)
synonym_words = pd.read_excel(synonym_file)
# In[206]:
#数据处理
n_topwords=200
topwords = get_wordlist(data,n_topwords,synonym_words,stop_words)
# In[207]:
t_segs,word_docs = get_t_seg(topwords,data,synonym_words,stop_words)
n = len(t_segs)
co_matrix = get_comatrix(t_segs)
运行结果及报错内容
TypeError Traceback (most recent call last)
Input In [207], in
----> 1 t_segs,word_docs = get_t_seg(topwords,data,synonym_words,stop_words)
2 n = len(t_segs)
3 co_matrix = get_comatrix(t_segs)
TypeError: cannot unpack non-iterable NoneType object
后续代码
# In[ ]:
copmi = pd.DataFrame(np.zeros([len(topwords),len(topwords)]),columns=topwords,index=topwords)
for i in range(len(topwords)-1):
word1 = topwords[i]
for j in range(i+1,len(topwords)):
word2 = topwords[j]
copmi[word1][word2] = get_pmi(word1,word2,word_docs,co_matrix,n)
# In[ ]:
co_net =get_net(copmi,topwords)
# In[ ]:
nx.write_gexf(co_net,"/Users/melokiku/Desktop/LDA/result/word_pmi.gexf")
没有经验一头雾水,求帮忙解决🙏