在使用卷积神经网络+全连接神经网络计算句子相似度训练模型出现无梯度的问题。
以下是源代码
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
import os
import math
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#构建输入的向量
sentence_x = np.random.randn(1000, 38, 300)
sentence_x = tf.cast(tf.reshape(sentence_x, [1000, 38, 300, 1]), dtype=tf.float32)
sentence_y = np.random.randn(1000, 38, 300)
sentence_y = tf.cast(tf.reshape(sentence_y, [1000, 38, 300, 1]), dtype=tf.float32)
label = np.random.randint(0, 2, (1, 1000))
label = tf.reshape(label, [1000])
train_db = tf.data.Dataset.from_tensor_slices((sentence_x, sentence_y, label))
train_db = train_db.shuffle(100).batch(20)
#卷积层
conv_layers = [ # 5 units of 2 * conv +maxpooling
# unit 1
layers.Conv2D(3, kernel_size=[2, 2], strides=[2, 2], padding='same', activation = tf.nn.relu),
layers.Conv2D(3, kernel_size=[2, 2], padding='same', activation = tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides= 2, padding='same'),
# unit 2
layers.Conv2D(3, kernel_size=[2, 2], strides=[2, 2], padding='same', activation = tf.nn.relu),
layers.Conv2D(3, kernel_size=[2, 2], padding='same', activation = tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides= 2, padding='same'),
]
fc_net = Sequential([
layers.Dense(150, activation = tf.nn.relu),
layers.Dense(80, activation = tf.nn.relu),
layers.Dense(20, activation = None),
])
conv_net = Sequential(conv_layers)
conv_net.build(input_shape = [None, 38, 300, 1])
fc_net.build(input_shape = [None, 171])
optimizer = tf.keras.optimizers.Adam(1e-3)
variables = conv_net.trainable_variables + fc_net.trainable_variables
def main():
for epoch in range(50):
for step, (sentence_x, sentence_y, label) in enumerate(train_db):
with tf.GradientTape() as tape:
out1 = conv_net(sentence_x)
out2 = conv_net(sentence_y)
fc_input_x = tf.reshape(out1, [-1, 171])
fc_input_y = tf.reshape(out2, [-1, 171])
vec_x = fc_net(fc_input_x)
vec_y = fc_net(fc_input_y)
#对输出的句向量进行计算相似度值
output = tf.exp(-tf.reduce_sum(tf.abs(vec_x - vec_y), axis=1))
output = tf.reshape(output, [-1])
output = tf.math.ceil(output)
output1 = tf.one_hot(tf.cast(output, dtype=tf.int32), depth=2)
label = tf.cast(label, dtype=tf.int32)
label= tf.one_hot(label, depth=2)
print("output1", output1)
print("label", label)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output1, labels=label))
#loss = tf.reduce_sum(tf.square(output1-label))
grad = tape.gradient(loss, variables)
optimizer.apply_gradients(zip(grad, variables))
if step % 10 == 0:
print("epoch={0}, step = {1}, loss={2}".format(epoch, step, loss))
if __name__ == '__main__':
main()
希望大佬们能指点一下,本人入门级小白。