在tensorflow训练完模型,我直接采用同一个session进行测试,得到结果较好,但是采用训练完保存的模型,进行重新载入进行测试,结果较差,不懂是为什么会出现这样的结果。注:测试数据是一样的。以下是模型结果:
训练集:loss:0.384,acc:0.931. 验证集:loss:0.212,acc:0.968. 训练完在同一session内的测试集:acc:0.96。导入保存的模型进行测试:acc:0.29
def create_model(hps):
global_step = tf.Variable(tf.zeros([], tf.float64), name = 'global_step', trainable = False)
scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_lstm_nodes[-1]) / 3.0
print(type(scale))
gru_init = tf.random_normal_initializer(-scale, scale)
with tf.variable_scope('Bi_GRU_nn', initializer = gru_init):
for i in range(hps.num_lstm_layers):
cell_bw = tf.contrib.rnn.GRUCell(hps.num_lstm_nodes[i], activation = tf.nn.relu, name = 'cell-bw')
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob = dropout_keep_prob)
cell_fw = tf.contrib.rnn.GRUCell(hps.num_lstm_nodes[i], activation = tf.nn.relu, name = 'cell-fw')
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob = dropout_keep_prob)
rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_bw, cell_fw, inputs, dtype=tf.float32)
embeddedWords = tf.concat(rnn_outputs, 2)
finalOutput = embeddedWords[:, -1, :]
outputSize = hps.num_lstm_nodes[-1] * 2 # 因为是双向LSTM,最终的输出值是fw和bw的拼接,因此要乘以2
last = tf.reshape(finalOutput, [-1, outputSize]) # reshape成全连接层的输入维度
last = tf.layers.batch_normalization(last, training = is_training)
fc_init = tf.uniform_unit_scaling_initializer(factor = 1.0)
with tf.variable_scope('fc', initializer = fc_init):
fc1 = tf.layers.dense(last, hps.num_fc_nodes, name = 'fc1')
fc1_batch_normalization = tf.layers.batch_normalization(fc1, training = is_training)
fc_activation = tf.nn.relu(fc1_batch_normalization)
logits = tf.layers.dense(fc_activation, hps.num_classes, name = 'fc2')
with tf.name_scope('metrics'):
softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = tf.argmax(outputs, 1))
loss = tf.reduce_mean(softmax_loss)
# [0, 1, 5, 4, 2] ->argmax:2 因为在第二个位置上是最大的
y_pred = tf.argmax(tf.nn.softmax(logits), 1, output_type = tf.int64, name = 'y_pred')
# 计算准确率,看看算对多少个
correct_pred = tf.equal(tf.argmax(outputs, 1), y_pred)
# tf.cast 将数据转换成 tf.float32 类型
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.name_scope('train_op'):
tvar = tf.trainable_variables()
for var in tvar:
print('variable name: %s' % (var.name))
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvar), hps.clip_lstm_grads)
optimizer = tf.train.AdamOptimizer(hps.learning_rate)
train_op = optimizer.apply_gradients(zip(grads, tvar), global_step)
# return((inputs, outputs, is_training), (loss, accuracy, y_pred), (train_op, global_step))
return((inputs, outputs), (loss, accuracy, y_pred), (train_op, global_step))
placeholders, metrics, others = create_model(hps)
content, labels = placeholders
loss, accuracy, y_pred = metrics
train_op, global_step = others
def val_steps(sess, x_batch, y_batch, writer = None):
loss_val, accuracy_val = sess.run([loss,accuracy], feed_dict = {inputs: x_batch, outputs: y_batch, is_training: hps.val_is_training, dropout_keep_prob: 1.0})
return loss_val, accuracy_val
loss_summary = tf.summary.scalar('loss', loss)
accuracy_summary = tf.summary.scalar('accuracy', accuracy)
# 将所有的变量都集合起来
merged_summary = tf.summary.merge_all()
# 用于test测试的summary
merged_summary_test = tf.summary.merge([loss_summary, accuracy_summary])
LOG_DIR = '.'
run_label = 'run_Bi-GRU_Dropout_tensorboard'
run_dir = os.path.join(LOG_DIR, run_label)
if not os.path.exists(run_dir):
os.makedirs(run_dir)
train_log_dir = os.path.join(run_dir, timestamp, 'train')
test_los_dir = os.path.join(run_dir, timestamp, 'test')
if not os.path.exists(train_log_dir):
os.makedirs(train_log_dir)
if not os.path.join(test_los_dir):
os.makedirs(test_los_dir)
# saver得到的文件句柄,可以将文件训练的快照保存到文件夹中去
saver = tf.train.Saver(tf.global_variables(), max_to_keep = 5)
# train 代码
init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.2
test_keep_prob_value = 1.0
# 由于如果按照每一步都去计算的话,会很慢,所以我们规定每100次存储一次
output_summary_every_steps = 100
num_train_steps = 1000
# 每隔多少次保存一次
output_model_every_steps = 500
# 测试集测试
test_model_all_steps = 4000
i = 0
session_conf = tf.ConfigProto(
gpu_options = tf.GPUOptions(allow_growth=True),
allow_soft_placement = True,
log_device_placement = False)
with tf.Session(config = session_conf) as sess:
sess.run(init_op)
# 将训练过程中,将loss,accuracy写入文件里,后面是目录和计算图,如果想要在tensorboard中显示计算图,就想sess.graph加上
train_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
# 同样将测试的结果保存到tensorboard中,没有计算图
test_writer = tf.summary.FileWriter(test_los_dir)
batches = batch_iter(list(zip(x_train, y_train)), hps.batch_size, hps.num_epochs)
for batch in batches:
train_x, train_y = zip(*batch)
eval_ops = [loss, accuracy, train_op, global_step]
should_out_summary = ((i + 1) % output_summary_every_steps == 0)
if should_out_summary:
eval_ops.append(merged_summary)
# 那三个占位符输进去
# 计算loss, accuracy, train_op, global_step的图
eval_ops.append(merged_summary)
outputs_train = sess.run(eval_ops,
feed_dict={
inputs: train_x,
outputs: train_y,
dropout_keep_prob: train_keep_prob_value,
is_training: hps.train_is_training
})
loss_train, accuracy_train = outputs_train[0:2]
if should_out_summary:
# 由于我们想在100steps之后计算summary,所以上面 should_out_summary = ((i + 1) % output_summary_every_steps == 0)成立,
# 即为真True,那么我们将训练的内容放入eval_ops的最后面了,因此,我们想获得summary的结果得在eval_ops_results的最后一个
train_summary_str = outputs_train[-1]
# 将获得的结果写训练tensorboard文件夹中,由于训练从0开始,所以这里加上1,表示第几步的训练
train_writer.add_summary(train_summary_str, i + 1)
test_summary_str = sess.run([merged_summary_test],
feed_dict = {inputs: x_dev,
outputs: y_dev,
dropout_keep_prob: 1.0,
is_training: hps.val_is_training
})[0]
test_writer.add_summary(test_summary_str, i + 1)
current_step = tf.train.global_step(sess, global_step)
if (i + 1) % 100 == 0:
print("Step: %5d, loss: %3.3f, accuracy: %3.3f" % (i + 1, loss_train, accuracy_train))
# 500个batch校验一次
if (i + 1) % 500 == 0:
loss_eval, accuracy_eval = val_steps(sess, x_dev, y_dev)
print("Step: %5d, val_loss: %3.3f, val_accuracy: %3.3f" % (i + 1, loss_eval, accuracy_eval))
if (i + 1) % output_model_every_steps == 0:
path = saver.save(sess,os.path.join(out_dir, 'ckp-%05d' % (i + 1)))
print("Saved model checkpoint to {}\n".format(path))
print('model saved to ckp-%05d' % (i + 1))
if (i + 1) % test_model_all_steps == 0:
# test_loss, test_acc, all_predictions= sess.run([loss, accuracy, y_pred], feed_dict = {inputs: x_test, outputs: y_test, dropout_keep_prob: 1.0})
test_loss, test_acc, all_predictions= sess.run([loss, accuracy, y_pred], feed_dict = {inputs: x_test, outputs: y_test, is_training: hps.val_is_training, dropout_keep_prob: 1.0})
print("test_loss: %3.3f, test_acc: %3.3d" % (test_loss, test_acc))
batches = batch_iter(list(x_test), 128, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
for x_test_batch in batches:
batch_predictions = sess.run(y_pred, {inputs: x_test_batch, is_training: hps.val_is_training, dropout_keep_prob: 1.0})
all_predictions = np.concatenate([all_predictions, batch_predictions])
correct_predictions = float(sum(all_predictions == y.flatten()))
print("Total number of test examples: {}".format(len(y_test)))
print("Accuracy: {:g}".format(correct_predictions/float(len(y_test))))
test_y = y_test.argmax(axis = 1)
#生成混淆矩阵
conf_mat = confusion_matrix(test_y, all_predictions)
fig, ax = plt.subplots(figsize = (4,2))
sns.heatmap(conf_mat, annot=True, fmt = 'd', xticklabels = cat_id_df.category_id.values, yticklabels = cat_id_df.category_id.values)
font_set = FontProperties(fname = r"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", size=15)
plt.ylabel(u'实际结果',fontsize = 18,fontproperties = font_set)
plt.xlabel(u'预测结果',fontsize = 18,fontproperties = font_set)
plt.savefig('./test.png')
print('accuracy %s' % accuracy_score(all_predictions, test_y))
print(classification_report(test_y, all_predictions,target_names = cat_id_df['category_name'].values))
print(classification_report(test_y, all_predictions))
i += 1
以上的模型代码,请求各位大神帮我看看,为什么出现这样的结果?