gradients返回值一直是[None,]
def net_init(self,state,modelIndex):
c, h, w = self.input_dim
self.predict = Sequential([
tf.keras.layers.Conv2D(32,8,4,activation='relu',input_shape=(c, h, w),data_format="channels_first"),
tf.keras.layers.Conv2D(64, 4, 2, activation='relu',padding="VALID"),
tf.keras.layers.Conv2D(64, 3, 1, activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, input_shape=(3136,), activation='relu'),
tf.keras.layers.Dense(self.output_dim, input_shape=(512,), activation=None)])
self.target = Sequential([
tf.keras.layers.Conv2D(32, 8, 4, activation='relu', input_shape=(4, 84, 84), data_format="channels_first"),
tf.keras.layers.Conv2D(64, 4, 2, activation='relu', padding="VALID"),
tf.keras.layers.Conv2D(64, 3, 1, activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, input_shape=(3136,), activation='relu'),
tf.keras.layers.Dense(self.output_dim, input_shape=(512,), activation=None)])
if modelIndex==1:
# self.predict.summary()
return self.predict(state)
if modelIndex==2:
self.target.summary()
return self.target(state)
@tf.function
def train_step(self, states, actions):
with tf.GradientTape() as tape:
tape.watch(states)
loss = tf.keras.losses.huber(states, actions)
gradients = tape.gradient(loss, self.predict.trainable_variables)
# gradients = [tf.clip_by_norm(gradient, 10) for gradient in gradients]
self.optimizer.apply_gradients(zip(gradients, self.predict.trainable_variables))
return loss
求解答