小洛儿 2019-12-19 11:09 采纳率: 0%
浏览 2082

tensorflow训练完模型直接测试和导入模型进行测试的结果不同,一个很好,一个略差,这是为什么?

在tensorflow训练完模型,我直接采用同一个session进行测试,得到结果较好,但是采用训练完保存的模型,进行重新载入进行测试,结果较差,不懂是为什么会出现这样的结果。注:测试数据是一样的。以下是模型结果:

训练集:loss:0.384,acc:0.931. 验证集:loss:0.212,acc:0.968. 训练完在同一session内的测试集:acc:0.96。导入保存的模型进行测试:acc:0.29

def create_model(hps):


    global_step = tf.Variable(tf.zeros([], tf.float64), name = 'global_step', trainable = False)
    scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_lstm_nodes[-1]) / 3.0
    print(type(scale))
    gru_init = tf.random_normal_initializer(-scale, scale)

    with tf.variable_scope('Bi_GRU_nn', initializer = gru_init):
        for i in range(hps.num_lstm_layers):
            cell_bw = tf.contrib.rnn.GRUCell(hps.num_lstm_nodes[i], activation = tf.nn.relu, name = 'cell-bw')

            cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob = dropout_keep_prob)
            cell_fw = tf.contrib.rnn.GRUCell(hps.num_lstm_nodes[i], activation = tf.nn.relu, name = 'cell-fw')
            cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob = dropout_keep_prob)

        rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_bw, cell_fw, inputs, dtype=tf.float32)
        embeddedWords = tf.concat(rnn_outputs, 2)
        finalOutput = embeddedWords[:, -1, :]
        outputSize = hps.num_lstm_nodes[-1] * 2  # 因为是双向LSTM,最终的输出值是fw和bw的拼接,因此要乘以2
        last = tf.reshape(finalOutput, [-1, outputSize])  # reshape成全连接层的输入维度
        last = tf.layers.batch_normalization(last, training = is_training) 
    fc_init = tf.uniform_unit_scaling_initializer(factor = 1.0)

    with tf.variable_scope('fc', initializer = fc_init):
        fc1 = tf.layers.dense(last, hps.num_fc_nodes, name = 'fc1')
        fc1_batch_normalization = tf.layers.batch_normalization(fc1, training = is_training)
        fc_activation = tf.nn.relu(fc1_batch_normalization)
        logits = tf.layers.dense(fc_activation, hps.num_classes, name = 'fc2')

    with tf.name_scope('metrics'):
        softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = tf.argmax(outputs, 1))

        loss = tf.reduce_mean(softmax_loss)
        # [0, 1, 5, 4, 2] ->argmax:2 因为在第二个位置上是最大的
        y_pred = tf.argmax(tf.nn.softmax(logits), 1, output_type = tf.int64, name = 'y_pred')
        # 计算准确率,看看算对多少个
        correct_pred = tf.equal(tf.argmax(outputs, 1), y_pred)
        # tf.cast  将数据转换成 tf.float32 类型
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  


    with tf.name_scope('train_op'):
        tvar = tf.trainable_variables()
        for var in tvar:
            print('variable name: %s' % (var.name))
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvar), hps.clip_lstm_grads)
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvar), global_step)
#    return((inputs, outputs, is_training), (loss, accuracy, y_pred), (train_op, global_step))
    return((inputs, outputs), (loss, accuracy, y_pred), (train_op, global_step))

placeholders, metrics, others = create_model(hps)
content, labels = placeholders
loss, accuracy, y_pred = metrics
train_op, global_step = others  

def val_steps(sess, x_batch, y_batch, writer = None):

    loss_val, accuracy_val = sess.run([loss,accuracy], feed_dict = {inputs: x_batch, outputs: y_batch, is_training: hps.val_is_training,  dropout_keep_prob: 1.0})
    return loss_val, accuracy_val

loss_summary = tf.summary.scalar('loss', loss)
accuracy_summary = tf.summary.scalar('accuracy', accuracy)

# 将所有的变量都集合起来
merged_summary = tf.summary.merge_all()
# 用于test测试的summary
merged_summary_test = tf.summary.merge([loss_summary, accuracy_summary])

LOG_DIR = '.'
run_label = 'run_Bi-GRU_Dropout_tensorboard'
run_dir = os.path.join(LOG_DIR, run_label)
if not os.path.exists(run_dir):
    os.makedirs(run_dir)
train_log_dir = os.path.join(run_dir, timestamp, 'train')
test_los_dir = os.path.join(run_dir, timestamp, 'test')
if not os.path.exists(train_log_dir):
    os.makedirs(train_log_dir)
if not os.path.join(test_los_dir):
    os.makedirs(test_los_dir)

# saver得到的文件句柄,可以将文件训练的快照保存到文件夹中去
saver = tf.train.Saver(tf.global_variables(), max_to_keep = 5)

# train 代码
init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.2
test_keep_prob_value = 1.0
# 由于如果按照每一步都去计算的话,会很慢,所以我们规定每100次存储一次
output_summary_every_steps = 100
num_train_steps = 1000
# 每隔多少次保存一次
output_model_every_steps = 500
# 测试集测试
test_model_all_steps = 4000
i = 0

session_conf = tf.ConfigProto(
    gpu_options = tf.GPUOptions(allow_growth=True),
    allow_soft_placement = True,
    log_device_placement = False)

with tf.Session(config = session_conf) as sess:
    sess.run(init_op)
    # 将训练过程中,将loss,accuracy写入文件里,后面是目录和计算图,如果想要在tensorboard中显示计算图,就想sess.graph加上
    train_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    # 同样将测试的结果保存到tensorboard中,没有计算图
    test_writer = tf.summary.FileWriter(test_los_dir)
    batches = batch_iter(list(zip(x_train, y_train)), hps.batch_size, hps.num_epochs)

    for batch in batches:
        train_x, train_y = zip(*batch)
        eval_ops = [loss, accuracy, train_op, global_step]
        should_out_summary = ((i + 1) % output_summary_every_steps == 0)

        if should_out_summary:
            eval_ops.append(merged_summary)
        # 那三个占位符输进去
        # 计算loss, accuracy, train_op, global_step的图
        eval_ops.append(merged_summary)
        outputs_train = sess.run(eval_ops,
                               feed_dict={
                                   inputs: train_x,
                                   outputs: train_y,
                                   dropout_keep_prob: train_keep_prob_value,
                                   is_training: hps.train_is_training
                               })
        loss_train, accuracy_train = outputs_train[0:2]
        if should_out_summary:
            # 由于我们想在100steps之后计算summary,所以上面 should_out_summary = ((i + 1) % output_summary_every_steps == 0)成立,
            # 即为真True,那么我们将训练的内容放入eval_ops的最后面了,因此,我们想获得summary的结果得在eval_ops_results的最后一个
            train_summary_str = outputs_train[-1]
            # 将获得的结果写训练tensorboard文件夹中,由于训练从0开始,所以这里加上1,表示第几步的训练
            train_writer.add_summary(train_summary_str, i + 1)

            test_summary_str = sess.run([merged_summary_test], 
                                        feed_dict = {inputs: x_dev, 
                                                     outputs: y_dev, 
                                                     dropout_keep_prob: 1.0,
                                                     is_training: hps.val_is_training
                                                     })[0]
            test_writer.add_summary(test_summary_str, i + 1)
        current_step = tf.train.global_step(sess, global_step)
        if (i + 1) % 100 == 0:
            print("Step: %5d, loss: %3.3f, accuracy: %3.3f"  % (i + 1, loss_train, accuracy_train))

        # 500个batch校验一次
        if (i + 1) % 500 == 0:
            loss_eval, accuracy_eval = val_steps(sess, x_dev, y_dev)
            print("Step: %5d, val_loss: %3.3f, val_accuracy: %3.3f"  % (i + 1, loss_eval, accuracy_eval))
        if (i + 1) % output_model_every_steps == 0:
            path = saver.save(sess,os.path.join(out_dir, 'ckp-%05d' % (i + 1)))
            print("Saved model checkpoint to {}\n".format(path))
            print('model saved to ckp-%05d' % (i + 1))
        if (i + 1) % test_model_all_steps == 0:
#            test_loss, test_acc, all_predictions= sess.run([loss, accuracy, y_pred], feed_dict = {inputs: x_test, outputs: y_test, dropout_keep_prob: 1.0})
            test_loss, test_acc, all_predictions= sess.run([loss, accuracy, y_pred], feed_dict = {inputs: x_test, outputs: y_test, is_training: hps.val_is_training, dropout_keep_prob: 1.0})
            print("test_loss: %3.3f, test_acc: %3.3d" % (test_loss, test_acc))
            batches = batch_iter(list(x_test), 128, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []
            for x_test_batch in batches:
                batch_predictions = sess.run(y_pred, {inputs: x_test_batch, is_training: hps.val_is_training, dropout_keep_prob: 1.0})
                all_predictions = np.concatenate([all_predictions, batch_predictions])
            correct_predictions = float(sum(all_predictions == y.flatten()))

            print("Total number of test examples: {}".format(len(y_test)))
            print("Accuracy: {:g}".format(correct_predictions/float(len(y_test))))

            test_y = y_test.argmax(axis = 1)
            #生成混淆矩阵
            conf_mat = confusion_matrix(test_y, all_predictions)

            fig, ax = plt.subplots(figsize = (4,2))

            sns.heatmap(conf_mat, annot=True, fmt = 'd', xticklabels = cat_id_df.category_id.values, yticklabels = cat_id_df.category_id.values)
            font_set = FontProperties(fname = r"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", size=15)
            plt.ylabel(u'实际结果',fontsize = 18,fontproperties = font_set)
            plt.xlabel(u'预测结果',fontsize = 18,fontproperties = font_set)
            plt.savefig('./test.png')
            print('accuracy %s' % accuracy_score(all_predictions, test_y))
            print(classification_report(test_y, all_predictions,target_names = cat_id_df['category_name'].values))
            print(classification_report(test_y, all_predictions))
        i += 1

以上的模型代码,请求各位大神帮我看看,为什么出现这样的结果?

  • 写回答

2条回答 默认 最新

  • AIShark 2019-12-31 09:00
    关注

    检查模型中有没有BN操作,如果有Check下BN参数有没有得到更新并保存到模型中,关于BN操作参数更新的写法,百度一下有很多示例。

    评论

报告相同问题?

悬赏问题

  • ¥15 YoloV5 第三方库的版本对照问题
  • ¥15 请完成下列相关问题!
  • ¥15 drone 推送镜像时候 purge: true 推送完毕后没有删除对应的镜像,手动拷贝到服务器执行结果正确在样才能让指令自动执行成功删除对应镜像,如何解决?
  • ¥15 求daily translation(DT)偏差订正方法的代码
  • ¥15 js调用html页面需要隐藏某个按钮
  • ¥15 ads仿真结果在圆图上是怎么读数的
  • ¥20 Cotex M3的调试和程序执行方式是什么样的?
  • ¥20 java项目连接sqlserver时报ssl相关错误
  • ¥15 一道python难题3
  • ¥15 牛顿斯科特系数表表示