我在位bert添加层时报错:
ValueError: Shapes (None,) and (None, 309, 3) are incompatible
这是我的代码:
def load_texts(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
texts = [line.strip() for line in f]
return texts
# 加载数据
AD_texts = load_texts('data/0.txt')
HC_texts = load_texts('data/1.txt')
MCI_texts = load_texts('data/2.txt')
# 分词
AD_texts_tokenized = [' '.join(jieba.cut(text)) for text in AD_texts]
HC_texts_tokenized = [' '.join(jieba.cut(text)) for text in HC_texts]
MCI_texts_tokenized = [' '.join(jieba.cut(text)) for text in MCI_texts]
# 构建数据列表
texts = AD_texts_tokenized + HC_texts_tokenized + MCI_texts_tokenized
labels = [0] * len(AD_texts) + [1] * len(HC_texts) + [2] * len(MCI_texts)
# 对数据进行切分
train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)
# 创建tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((dict(val_encodings), val_labels))
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)
# 创建tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((dict(val_encodings), val_labels))
if 1:
# 定义BERT模型并抽取特征
bert_model = TFBertModel.from_pretrained('bert-base-chinese')
input_ids = Input(shape=(309,), dtype=tf.int32, name='input_ids') # 假设最大长度为512
attention_mask = Input(shape=(309,), dtype=tf.int32, name='attention_mask')
sequence_output = bert_model([input_ids, attention_mask])[0]
# 添加双向LSTM层
lstm_layer = Bidirectional(LSTM(units=32, return_sequences=True)) # LSTM单元数可自定义
lstm_out = lstm_layer(sequence_output)
# 添加全连接层(线性层)
dense_layer = Dense(units=16, activation='relu')###################
dense_out = dense_layer(lstm_out)
# 添加输出层与softmax激活函数
output_layer = Dense(units=3, activation='softmax') # 输出类别数为3
output = output_layer(dense_out)
# 构建完整模型
model = Model(inputs=[input_ids, attention_mask], outputs=output)
# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5),
loss='categorical_crossentropy',
metrics=['accuracy'])
# 训练模型
history = model.fit(train_dataset.shuffle(100).batch(1), epochs=2, validation_data=val_dataset.batch(1))
# 保存模型
model.save('my_model_add.h5')
我打印的模型结构为:
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_ids (InputLayer) [(None, 309)] 0 []
attention_mask (InputLayer) [(None, 309)] 0 []
tf_bert_model (TFBertModel) TFBaseModelOutputWi 102267648 ['input_ids[0][0]',
thPoolingAndCrossAt 'attention_mask[0][0]']
tentions(last_hidde
n_state=(None, 309,
768),
pooler_output=(Non
e, 768),
past_key_values=No
ne, hidden_states=N
one, attentions=Non
e, cross_attentions
=None)
bidirectional (Bidirectional) (None, 309, 64) 205056 ['tf_bert_model[0][0]']
dense (Dense) (None, 309, 16) 1040 ['bidirectional[0][0]']
dense_1 (Dense) (None, 309, 3) 51 ['dense[0][0]']
==================================================================================================
Total params: 102,473,795
Trainable params: 102,473,795
Non-trainable params: 0
__________________________________________________________________________________________________