问题遇到的现象和发生背景
第一次使用Tensorflow, 这是我在GitHub上找的一个项目,这个项目原本用TensorFlow 1.x的写法来写的,我用TensorFlow 2.x的写法仿造一些示例代码来改写,训练过程中遇到所提到的这个错误。
运行结果及报错内容
Traceback (most recent call last):
File "train.py", line 78, in <module>
train(model)
File "train.py", line 24, in train
model.optimize()
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 933, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 763, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3050, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3971, in bound_method_wrapper
return wrapped_fn(*args, **kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 986, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/root/autodl-tmp/HyperBox-main/script/model_v2/box_model.py:328 optimize *
self.create_optimizer()
/root/autodl-tmp/HyperBox-main/script/model_v2/box_model.py:139 create_optimizer *
self.optimizer = tf.keras.optimizers.Adam(
/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:530 minimize **
return self.apply_gradients(grads_and_vars, name=name)
/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:630 apply_gradients
grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/utils.py:75 filter_empty_gradients
raise ValueError("No gradients provided for any variable: %s." %
ValueError: No gradients provided for any variable: ['rel_bases:0', 'rel_shapes:0', 'rel_multiples:0', 'base_weight_ent_emb:0', 'bump_weight_ent_emb:0', 'ent_emb:0', 'ent_emb_bmp:0'].
问题相关代码,请勿粘贴截图
class BoxE:
def __init__(self, params, corpus_type, work_dir):
self.params = params
self.alpha = params.alpha
self.num_rel = 1
self.bounded_norm = params.bounded_norm
self.normed_bumps = params.normed_bumps
self.fixed_width = params.fixed_width
self.hard_size = params.hard_size
self.total_size = params.total_size
self.learnable_shape = params.learnable_shape
self.corpus_type = corpus_type
self.cwd = work_dir
self.word_vectors = np.load(
f"{self.cwd}/../../word_vectors_processed/{self.corpus_type}_word_vectors_processed.npy"
)
def create_optimizer(self):
self.loss = -1 * tf.math.reduce_mean(
input_tensor=tf.math.log_sigmoid(self.params.gamma - self.pos_dissims)
) - tf.math.reduce_mean(
input_tensor=tf.math.log_sigmoid(self.neg_dissims - self.params.gamma)
)
pprint("type of loss:{}".format(type(self.loss)))
self.optimizer = tf.keras.optimizers.Adam(
self.params.learning_rate
).minimize(self.loss, self.var_list, tape=tf.GradientTape()) #报错的代码
def create_train_model(self):
self.pos_h_points = tf.expand_dims(self.ph_base_emb + self.pt_bump_emb, 1)
self.pos_t_points = tf.expand_dims(self.pt_base_emb + self.ph_bump_emb, 1)
self.neg_h_points = tf.expand_dims(self.nh_base_emb + self.nt_bump_emb, 1)
self.neg_t_points = tf.expand_dims(self.nt_base_emb + self.nh_bump_emb, 1)
self.pos_points = tf.math.tanh(
tf.concat([self.pos_h_points, self.pos_t_points], 1)
)
self.neg_points = tf.math.tanh(
tf.concat([self.neg_h_points, self.neg_t_points], 1)
)
#### concat dimension is batch*2*100 ####
self.pos_dissims = self.distance_function(self.pos_points)
self.neg_dissims = self.distance_function(self.neg_points)
def gather_train_embeddings(self, ph:tf.int32, pt:tf.int32, nh:tf.int32, nt:tf.int32, r:tf.int32):
temp = tf.matmul(self.ent_emb, self.base_weight_ent_emb)
self.ph_base_emb = tf.gather(temp, ph)
self.pt_base_emb = tf.gather(temp, pt)
self.nh_base_emb = tf.gather(temp, nh)
self.nt_base_emb = tf.gather(temp, nt)
temp1 = tf.matmul(self.ent_emb_bmp, self.bump_weight_ent_emb)
if self.normed_bumps: # Normalization of bumps option
temp1 = tf.math.l2_normalize(temp1, axis=1)
self.ph_bump_emb = tf.gather(temp1, ph)
self.pt_bump_emb = tf.gather(temp1, pt)
self.nh_bump_emb = tf.gather(temp1, nh)
self.nt_bump_emb = tf.gather(temp1, nt)
self.rel_bases_emb = tf.math.tanh(tf.gather(self.rel_bases, r))
self.rel_deltas_emb = tf.math.tanh(tf.gather(self.rel_deltas, r))
@tf.function
def optimize(self):
for itr in range(0, self.params.max_iterate + 1):
total_loss = 0.0
for b in range(self.num_batch):
ph, pt, nh, nt, r = self.reader.next_batch()
self.gather_train_embeddings(ph, pt, nh, nt, r)
self.create_train_model()
self.create_optimizer()
total_loss += self.loss
if math.isnan(total_loss):
break
print("Loss in iteration", itr, "=", total_loss)
if itr % self.params.save_each == 0 and itr >= self.params.save_after:
self.save_model(itr)
print("done saving model")
def setup_weights(self):
sqrt_size = 6.0 / math.sqrt(self.params.emb_size)
self.ent_emb = tf.Variable(self.word_vectors, dtype=tf.float32, name="ent_emb")
self.ent_emb_bmp = tf.Variable(
self.word_vectors, dtype=tf.float32, name="ent_emb_bmp"
)
self.base_weight_ent_emb = tf.Variable(
name="base_weight_ent_emb",
initial_value=tf.random.uniform(
# shape=[300, self.params.emb_size], minval=-sqrt_size, maxval=sqrt_size
shape=[400, self.params.emb_size], minval=-sqrt_size, maxval=sqrt_size
),
)
self.bump_weight_ent_emb = tf.Variable(
name="bump_weight_ent_emb",
initial_value=tf.random.uniform(
# shape=[300, self.params.emb_size], minval=-sqrt_size, maxval=sqrt_size
shape=[400, self.params.emb_size], minval=-sqrt_size, maxval=sqrt_size
),
)
if self.learnable_shape: # If shape is learnable, define variables accordingly
self.rel_shapes = tf.Variable(
name="rel_shapes",
initial_value=tf.random.uniform(
shape=[self.num_rel, 2, self.params.emb_size],
minval=-sqrt_size,
maxval=sqrt_size,
),
)
self.norm_rel_shapes = self.product_normalise(
self.rel_shapes, self.bounded_norm
)
else:
self.norm_rel_shapes = tf.ones(
[self.num_rel, 2, self.params.emb_size], name="norm_rel_shapes"
)
self.rel_bases = tf.Variable(
name="rel_bases",
initial_value=tf.random.uniform(
shape=[self.num_rel, 2, self.params.emb_size],
minval=-sqrt_size,
maxval=sqrt_size,
),
)
if self.fixed_width:
self.rel_multiples1 = tf.zeros([self.num_rel, 2, 1])
else:
self.rel_multiples1 = tf.Variable(
name="rel_multiples",
initial_value=tf.random.uniform(
shape=[self.num_rel, 2, 1], minval=-sqrt_size, maxval=sqrt_size
),
)
if self.hard_size:
self.rel_multiples = self.total_size * tf.nn.softmax(
self.rel_multiples1, axis=0
)
else:
self.rel_multiples = tf.nn.elu(self.rel_multiples1) + tf.constant(1.0)
self.rel_deltas = tf.multiply(
self.rel_multiples, self.norm_rel_shapes, name="rel_deltas"
)
self.var_list = [
self.rel_bases,
self.rel_shapes,
self.rel_multiples1,
self.base_weight_ent_emb,
self.bump_weight_ent_emb,
self.ent_emb,
self.ent_emb_bmp,
]
def setup_reader(self):
self.reader = Reader(self.corpus_type)
self.reader.read_triples()
self.reader.set_batch_size(self.params.batch_size)
self.reader.set_neg_samples(self.params.no_neg_samples)
self.num_batch = self.reader.num_batch()
self.num_ent = self.reader.num_ent()
def product_normalise(self, input_tensor, bounded_norm=True):
step1_tensor = tf.abs(input_tensor)
step2_tensor = step1_tensor + (10 ** -8)
log_norm_tensor = tf.math.log(step2_tensor)
step3_tensor = tf.reduce_mean(input_tensor=log_norm_tensor, axis=2, keepdims=True)
norm_volume = tf.math.exp(step3_tensor)
pre_norm_out = input_tensor / norm_volume
if not bounded_norm:
return pre_norm_out
else:
minsize_tensor = tf.minimum(
tf.reduce_min(input_tensor=log_norm_tensor, axis=2, keepdims=True), -1
)
maxsize_tensor = tf.maximum(
tf.reduce_max(input_tensor=log_norm_tensor, axis=2, keepdims=True), 1
)
minsize_ratio = -1 / minsize_tensor
maxsize_ratio = 1 / maxsize_tensor
size_norm_ratio = tf.minimum(minsize_ratio, maxsize_ratio)
normed_tensor = log_norm_tensor * size_norm_ratio
return tf.exp(normed_tensor)
def distance_function(self, points):
self.rel_bx_low, self.rel_bx_high = self.compute_box(
self.rel_bases_emb, self.rel_deltas_emb
)
lower_corner = self.rel_bx_low
upper_corner = self.rel_bx_high
centres = 1 / 2 * (lower_corner + upper_corner)
widths = upper_corner - lower_corner
widths_p1 = widths + tf.constant(1.0)
width_cond = tf.compat.v1.where(
tf.logical_and(lower_corner <= points, points <= upper_corner),
tf.abs(points - centres) / widths_p1,
widths_p1 * tf.abs(points - centres)
- (widths / 2) * (widths_p1 - 1 / widths_p1),
)
distance = tf.norm(
tensor=width_cond, axis=2, ord=self.params.p_norm
) ###batch*2*1 after norm
distance = tf.reduce_sum(input_tensor=distance, axis=1)
return distance
def save_model(self, itr):
# filename = (
# f"{self.cwd}/BoxModel_"
# + self.corpus_type
# + "_weights/"
# + str(itr)
# + ".ckpt"
# )
filename = "all_trained_models"
if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename))
tf.saved_model.save(self, filename)
tf.keras.models.save_model(filename, save_format='tf')
def compute_box(self, box_base, box_delta):
box_second = box_base + tf.constant(0.5) * box_delta
box_first = box_base - tf.constant(0.5) * box_delta
box_low = tf.minimum(box_first, box_second, "box_low")
box_high = tf.maximum(box_first, box_second, "box_high")
return box_low, box_high
我的解答思路和尝试过的方法
网上搜了一下,基本上这个错误是因为loss的计算跟var_list里面的可训练参数没有关系,导致梯度不能传递:
https://blog.csdn.net/M1ddle/article/details/106387040?spm=1001.2101.3001.6650.1&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7Edefault-1.pc_relevant_default&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7Edefault-1.pc_relevant_default&utm_relevant_index=2
但是我检查了代码,就我看到的,loss的确是由var_list里的可训练参数一步步直接或间接计算得到的,不知道问题出在哪。
我想要达到的结果
正常传递梯度,正常进行Adam优化