贡献矩阵一直说有错误
import xlrd
import time
time_start=time.time()
path=r'D:\大学\语言处理\学python\XXX_任务四\材料1.xls'
def readxls_bycol(path,colnum):
xl=xlrd.open_workbook(path)
table=xl.sheets()[0]
data=list(table.col_values(colnum))
print(data)
return data
#把表格里的数据整成set
def intoset(data):
data_list=[]
data_set=set()
for i in data:
data_list.extend(i.split('/'))#按/分词放入list
data_list.sort()#排序
data_set = set(data_list)
print(data_set)
return data_set
#根据set建立二维列表
def create_list2(data_set):
i=len(data_set)+1
list2=[[0 for col in range(i)] for row in range(i)]#把行row和列col整出来
n=1
for row_1 in data_set:
list2[0][n]=row_1#填第一行
n+=1
if n==i:#超过了二位列表的行数就停下来
break
print(list2)
m=1
print(data_set)
for cols in data_set:#填第一列
list2[m][0]=cols
m+=1
if m==i:
break
print(list2)
return list2
#计算共现次数,填表
def count_frequency(list2,data,data_set):
data_formted=[]#格式化
for i in data:
data_formted.append(i.split('/'))#按/分词放入list
print(data_formted)
for row in range(1,len(data_set)):#一行一行看
for col in range(1,len(data_set)):#一列一列看
if row == col:#对角线为零
continue
else:
counter=0
for i in data_formted:
if list2[col][0] in i and list2[0][row] in i:
counter+=1
list2[row][col]=counter
print(list2)
return list2
def switch_totxt(path,matrix):
with open(path,'w') as f:
for row in range (0,len(matrix)):
for col in range(0, len(matrix)):#从行到列遍历
f.write(str(matrix[row][col])+'\t')
f.write('\n')
def main():
path_xls=r'材料1.xlsx'
path_txt=r'共现矩阵.txt'
colnum=2
data=readxls_bycol(path_txt,colnum)
data_set=intoset(data)
list2=create_list2(data_set)
matrix=count_frequency(list2,data,data_set)
print(matrix)
switch_totxt(path_txt,matrix)
main()
time_end = time.time()
print(time_end - time_start)
print('sucess!')