我搭建了一个三层的DBN网络
结构维50-50-35-20-17
共三层RBM,提前预训练,最后一层做分类
网络定义如下,RBM的参数是用预训练的参数初始化的
def DBN_net(self):
w1 = tf.Variable(self.rbm_list[0].w, name="w1")
b1 = tf.Variable(deepcopy(self.rbm_list[0].hb), name="b1")
probs = tf.nn.sigmoid(tf.matmul(self.x, w1) + b1)
h1 = tf.nn.relu(tf.sign(probs - tf.random_uniform(tf.shape(probs))))
w2 = tf.Variable(deepcopy(self.rbm_list[1].w), name="w2")
b2 = tf.Variable(deepcopy(self.rbm_list[1].hb), name="b2")
probs = tf.nn.sigmoid(tf.matmul(h1, w2) + b2)
h2 = tf.nn.relu(tf.sign(probs - tf.random_uniform(tf.shape(probs))))
w3 = tf.Variable(deepcopy(self.rbm_list[2].w), name="w3")
b3 = tf.Variable(deepcopy(self.rbm_list[2].hb), name="b3")
probs = tf.nn.sigmoid(tf.matmul(h2, w3) + b3)
h3 = tf.nn.relu(tf.sign(probs - tf.random_uniform(tf.shape(probs))))
input_size = tf.cast(h3.shape[1], tf.int32)
w4 = tf.Variable(tf.truncated_normal([input_size, self.faults]), name="w4")
b4 = tf.Variable(tf.zeros([self.faults]), dtype=tf.float32, name="b4")
out = tf.matmul(h3, w4) + b4
return out
def train(self):
with tf.Session() as sess:
out = self.DBN_net()
logits = tf.nn.softmax(out)
loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=self.y))
optimizer = tf.train.AdamOptimizer(self._opts._learning_rate).minimize(loss)
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(self.y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
sess.run(tf.global_variables_initializer())
for v in tf.trainable_variables():
print(v)
print(sess.run(v))
for i in range(self._opts._epoches):
for start, end in zip(range(0, len(self.x_train), self._opts._batchsize),
range(self._opts._batchsize,
len(self.x_train), self._opts._batchsize)):
batch_x = self.x_train[start:end]
batch_y = self.y_train[start:end]
train_op, train_loss, train_acc = sess.run([optimizer, loss, accuracy],
feed_dict={self.x:batch_x, self.y:batch_y})
# print("epoch {}, loss is {}, acc is {}".format(i, train_loss, train_acc))
print("--------------------------")
for v in tf.trainable_variables():
print(v)
print(sess.run(v))
打印了训练前后的权重,发现只有最后一层的权重和偏置会变,RBM的预训练的权重不会改变
请问是什么原因呢