第一次使用BERT_CCPoem,能不能帮忙看看有什么问题。
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification
import torch
#读取Excel表格数据
excel_file = 'C:\\Users\\86166\\Desktop\\tangshi_Cdata.xlsx'
df = pd.read_excel(excel_file)
#初始化BERT模型和tokenizer
tokenizer = BertTokenizer.from_pretrained("C:\\Users\\86166\\Desktop\\BERT_CCPoem_v1")
model = BertForSequenceClassification.from_pretrained("C:\\Users\\86166\\Desktop\\BERT_CCPoem_v1", num_labels=2) # 二分类情感分析模型
def analyze_sentiments(poetry_texts):
#使用tokenizer将文本转换为模型所需的input_ids张量
inputs = tokenizer(poetry_texts, return_tensors='pt', padding=True, truncation=True)
# 输入input_ids到模型中,获取模型输出
outputs = model(**inputs)
#获取模型预测的情感分数
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1)
positive_scores = probabilities[:, 1].tolist()
return positive_scores
#对每首诗进行情感分析
batch_size = 8
sentiment_scores = []
poetry_texts_batch = []
for index, row in df.iterrows():
poetry_text = row['古诗内容']
poetry_texts_batch.append(poetry_text)
if len(poetry_texts_batch) == batch_size:
#批处理情感分析
batch_scores = analyze_sentiments(poetry_texts_batch)
sentiment_scores.extend(batch_scores)
poetry_texts_batch = []
#处理剩余的文本
if poetry_texts_batch:
batch_scores = analyze_sentiments(poetry_texts_batch)
sentiment_scores.extend(batch_scores)
# 将情感分数保存到DataFrame中
df['情感分数'] = sentiment_scores
# 将带有情感分数的DataFrame保存回Excel文件
output_file = 'C:\\Users\\86166\\Desktop\\tangshi_sentiment.xlsx'
df.to_excel(output_file, index=False)
print(f"情感分数已保存到 {output_file}")
以上是部分运行结果