# 导入所需库
from sklearn.feature_extraction.text import TfidfVectorizer
# 定义文档集合
documents = []
with open('D:\\用户-\\Downloads\\分词结果.txt', 'r', encoding='utf-8') as file:
# 逐行读取文件内容并添加到documents列表中
for line in file:
documents.append(line.strip())
# 创建TfidfVectorizer对象
vectorizer = TfidfVectorizer()
# 计算TF-IDF值
tfidf_matrix = vectorizer.fit_transform(documents)
# 获取词语列表
feature_names = vectorizer.get_feature_names()
# 打印每个词语的TF-IDF值
for i in range(len(documents)):
print("Document", i+1)
for j in range(len(feature_names)):
print(feature_names[j], ":", tfidf_matrix[i, j])
分词结果文件是一行一行的单个词语