self.loss2 = tf.losses.mean_squared_error(reward,pred)
self.lr2 = tf.train.exponential_decay(self.lr2_start, self.global_step2, self.lr2_decay_step,self.lr2_decay_rate, staircase=False, name="learning_rate2")
self.opt2 = tf.train.RMSPropOptimizer(learning_rate=self.lr2,name='opt2')
self.grads2 = self.opt2.compute_gradients(self.loss2,var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope="critic"))
self.grads2 = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in self.grads2 if grad is not None]
self.train_step2 = self.opt2.apply_gradients(self.grads2,global_step=self.global_step2)
这个loss2的值会一直向负无穷变化,是我哪里写错了么