有没有人帮我看看为什么分完词后,统计词频时是按照单个字符统计的?
运行结果如下图

#数据资产管理实践白皮书(6.0版)
import jieba # 使用jieba分词
fileContent = open(r'D:/anacondadata/dataassetguanjiancicidian/数据资产管理实践白皮书(6.0版).txt','r', encoding = "utf-8").read()
words = jieba.lcut_for_search(fileContent)# 分词后返回一个列表
#print(words)
with open('D:\\anacondadata\\dataassetguanjiancicidian\\stopword\\hit_stopwords.txt', 'r', encoding='UTF-8') as files:
stopwords = [line.strip() for line in files.readlines()] #加载停用词表
result = ""
for w in words:
if w not in stopwords and len(w)>1: # 停用词过滤
if w not in '\n \t \r \f': #去除特殊符号
result+=w+" "
print(result)
counts = {}
for word in result:
counts[word] = counts.get(word, 0) + 1
for word, count in counts.items():
print(f"{word}: {count}")
