我在用Tensorflow的自定义层构建模型时报错:
File "seq2seq.py", line 414, in <module>
_, state_h, state_c = encoder_lstm(encoder_embedding)
File "C:\Users\sjq\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\keras\engine\keras_tensor.py", line 415, in __iter__
raise TypeError(
TypeError: Cannot iterate over a Tensor with unknown first dimension.
这是我的层定义和模型定义代码:
class EnhancedAttentionGRUCell_41(AttentionGRUCell):
def __init__(self, units, att_units, temp=1.0, **kwargs):
super().__init__(units, att_units, **kwargs)
self.en = En(att_units)
self.aaaaa = Aa(temp)
def call(self, inputs, states, training=None):
tf.compat.v1.enable_eager_execution()
print('ss',inputs.shape)
h_prev, seq_embed = states
h_gru, _ = self.gru_cell(inputs, states=[h_prev])
attended_seq = self.en(seq_embed)
temp_pooled_attended_seq = self.aaaaa(attended_seq)
output = tf.concat([h_gru, temp_pooled_attended_seq], axis=-1)
return output, [output, seq_embed]
class AttentionGRULayerEn_41(Layer):
def __init__(self, units, att_units, **kwargs):
super(AttentionGRULayerEn_41, self).__init__(**kwargs)
self.cell = EnhancedAttentionGRUCell_41(units, att_units)
self.units = units
self.att_units = att_units
self.input_dim=2048*20
'''
def get_initial_state(self, inputs):
batch_size = tf.shape(inputs)[0]
initial_hidden = tf.zeros(shape=(batch_size, self.units))
return initial_hidden, inputs
'''
def get_initial_state(self, inputs):
batch_size = batchsize#tf.shape(inputs)[0] # 获取动态批次大小
initial_hidden = tf.zeros(shape=(batch_size, self.units)) # 根据批次大小创建隐藏状态
initial_seq_embed = tf.zeros(shape=(batch_size, tf.shape(inputs)[1], self.input_dim))
return initial_hidden, initial_seq_embed
def call(self, inputs, states=None, training=None):
if not tf.executing_eagerly():
return inputs
if states is None:
states = self.get_initial_state(inputs)
#tf.assert_rank(x, 2)
#print(inputs.shape)
outputs, new_states = tf.keras.backend.rnn(self.cell, inputs, states,
constants=None, unroll=False, time_major=False,
go_backwards=False, mask=None)
final_output = outputs if isinstance(outputs, tf.Tensor) else outputs[-1]
return final_output, new_states
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(input_dim=input_vocab_size,
output_dim=embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(units)
_, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]
model_encoder = Model(encoder_inputs, encoder_states)
# 解码器模型
decoder_inputs = Input(shape=(None, ))
decoder_embedding = Embedding(input_dim=output_vocab_size, output_dim=embedding_dim,
batch_input_shape=[batchsize, None])(decoder_inputs)
decoder_lstm = AttentionGRULayerEn_41(units, units)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(output_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model_decoder = Model([decoder_inputs] + encoder_states, [decoder_outputs])
# 训练模型
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])