训练集:loss:0.384,acc:0.931. 验证集:loss:0.212,acc:0.968. 训练完在同一session内的测试集:acc:0.96。导入保存的模型进行测试:acc:0.29

def create_model(hps):

    global_step = tf.Variable(tf.zeros([], tf.float64), name = 'global_step', trainable = False)
    scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_lstm_nodes[-1]) / 3.0
    gru_init = tf.random_normal_initializer(-scale, scale)

    with tf.variable_scope('Bi_GRU_nn', initializer = gru_init):
        for i in range(hps.num_lstm_layers):
            cell_bw = tf.contrib.rnn.GRUCell(hps.num_lstm_nodes[i], activation = tf.nn.relu, name = 'cell-bw')

            cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob = dropout_keep_prob)
            cell_fw = tf.contrib.rnn.GRUCell(hps.num_lstm_nodes[i], activation = tf.nn.relu, name = 'cell-fw')
            cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob = dropout_keep_prob)

        rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_bw, cell_fw, inputs, dtype=tf.float32)
        embeddedWords = tf.concat(rnn_outputs, 2)
        finalOutput = embeddedWords[:, -1, :]
        outputSize = hps.num_lstm_nodes[-1] * 2  # 因为是双向LSTM,最终的输出值是fw和bw的拼接,因此要乘以2
        last = tf.reshape(finalOutput, [-1, outputSize])  # reshape成全连接层的输入维度
        last = tf.layers.batch_normalization(last, training = is_training) 
    fc_init = tf.uniform_unit_scaling_initializer(factor = 1.0)

    with tf.variable_scope('fc', initializer = fc_init):
        fc1 = tf.layers.dense(last, hps.num_fc_nodes, name = 'fc1')
        fc1_batch_normalization = tf.layers.batch_normalization(fc1, training = is_training)
        fc_activation = tf.nn.relu(fc1_batch_normalization)
        logits = tf.layers.dense(fc_activation, hps.num_classes, name = 'fc2')

    with tf.name_scope('metrics'):
        softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = tf.argmax(outputs, 1))

        loss = tf.reduce_mean(softmax_loss)
        # [0, 1, 5, 4, 2] ->argmax:2 因为在第二个位置上是最大的
        y_pred = tf.argmax(tf.nn.softmax(logits), 1, output_type = tf.int64, name = 'y_pred')
        # 计算准确率,看看算对多少个
        correct_pred = tf.equal(tf.argmax(outputs, 1), y_pred)
        # tf.cast  将数据转换成 tf.float32 类型
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  

    with tf.name_scope('train_op'):
        tvar = tf.trainable_variables()
        for var in tvar:
            print('variable name: %s' % (var.name))
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvar), hps.clip_lstm_grads)
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvar), global_step)
#    return((inputs, outputs, is_training), (loss, accuracy, y_pred), (train_op, global_step))
    return((inputs, outputs), (loss, accuracy, y_pred), (train_op, global_step))

placeholders, metrics, others = create_model(hps)
content, labels = placeholders
loss, accuracy, y_pred = metrics
train_op, global_step = others  

def val_steps(sess, x_batch, y_batch, writer = None):

    loss_val, accuracy_val = sess.run([loss,accuracy], feed_dict = {inputs: x_batch, outputs: y_batch, is_training: hps.val_is_training,  dropout_keep_prob: 1.0})
    return loss_val, accuracy_val

loss_summary = tf.summary.scalar('loss', loss)
accuracy_summary = tf.summary.scalar('accuracy', accuracy)

# 将所有的变量都集合起来
merged_summary = tf.summary.merge_all()
# 用于test测试的summary
merged_summary_test = tf.summary.merge([loss_summary, accuracy_summary])

LOG_DIR = '.'
run_label = 'run_Bi-GRU_Dropout_tensorboard'
run_dir = os.path.join(LOG_DIR, run_label)
if not os.path.exists(run_dir):
train_log_dir = os.path.join(run_dir, timestamp, 'train')
test_los_dir = os.path.join(run_dir, timestamp, 'test')
if not os.path.exists(train_log_dir):
if not os.path.join(test_los_dir):

# saver得到的文件句柄,可以将文件训练的快照保存到文件夹中去
saver = tf.train.Saver(tf.global_variables(), max_to_keep = 5)

# train 代码
init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.2
test_keep_prob_value = 1.0
# 由于如果按照每一步都去计算的话,会很慢,所以我们规定每100次存储一次
output_summary_every_steps = 100
num_train_steps = 1000
# 每隔多少次保存一次
output_model_every_steps = 500
# 测试集测试
test_model_all_steps = 4000
i = 0

session_conf = tf.ConfigProto(
    gpu_options = tf.GPUOptions(allow_growth=True),
    allow_soft_placement = True,
    log_device_placement = False)

with tf.Session(config = session_conf) as sess:
    # 将训练过程中,将loss,accuracy写入文件里,后面是目录和计算图,如果想要在tensorboard中显示计算图,就想sess.graph加上
    train_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    # 同样将测试的结果保存到tensorboard中,没有计算图
    test_writer = tf.summary.FileWriter(test_los_dir)
    batches = batch_iter(list(zip(x_train, y_train)), hps.batch_size, hps.num_epochs)

    for batch in batches:
        train_x, train_y = zip(*batch)
        eval_ops = [loss, accuracy, train_op, global_step]
        should_out_summary = ((i + 1) % output_summary_every_steps == 0)

        if should_out_summary:
        # 那三个占位符输进去
        # 计算loss, accuracy, train_op, global_step的图
        outputs_train = sess.run(eval_ops,
                                   inputs: train_x,
                                   outputs: train_y,
                                   dropout_keep_prob: train_keep_prob_value,
                                   is_training: hps.train_is_training
        loss_train, accuracy_train = outputs_train[0:2]
        if should_out_summary:
            # 由于我们想在100steps之后计算summary,所以上面 should_out_summary = ((i + 1) % output_summary_every_steps == 0)成立,
            # 即为真True,那么我们将训练的内容放入eval_ops的最后面了,因此,我们想获得summary的结果得在eval_ops_results的最后一个
            train_summary_str = outputs_train[-1]
            # 将获得的结果写训练tensorboard文件夹中,由于训练从0开始,所以这里加上1,表示第几步的训练
            train_writer.add_summary(train_summary_str, i + 1)

            test_summary_str = sess.run([merged_summary_test], 
                                        feed_dict = {inputs: x_dev, 
                                                     outputs: y_dev, 
                                                     dropout_keep_prob: 1.0,
                                                     is_training: hps.val_is_training
            test_writer.add_summary(test_summary_str, i + 1)
        current_step = tf.train.global_step(sess, global_step)
        if (i + 1) % 100 == 0:
            print("Step: %5d, loss: %3.3f, accuracy: %3.3f"  % (i + 1, loss_train, accuracy_train))

        # 500个batch校验一次
        if (i + 1) % 500 == 0:
            loss_eval, accuracy_eval = val_steps(sess, x_dev, y_dev)
            print("Step: %5d, val_loss: %3.3f, val_accuracy: %3.3f"  % (i + 1, loss_eval, accuracy_eval))
        if (i + 1) % output_model_every_steps == 0:
            path = saver.save(sess,os.path.join(out_dir, 'ckp-%05d' % (i + 1)))
            print("Saved model checkpoint to {}\n".format(path))
            print('model saved to ckp-%05d' % (i + 1))
        if (i + 1) % test_model_all_steps == 0:
#            test_loss, test_acc, all_predictions= sess.run([loss, accuracy, y_pred], feed_dict = {inputs: x_test, outputs: y_test, dropout_keep_prob: 1.0})
            test_loss, test_acc, all_predictions= sess.run([loss, accuracy, y_pred], feed_dict = {inputs: x_test, outputs: y_test, is_training: hps.val_is_training, dropout_keep_prob: 1.0})
            print("test_loss: %3.3f, test_acc: %3.3d" % (test_loss, test_acc))
            batches = batch_iter(list(x_test), 128, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []
            for x_test_batch in batches:
                batch_predictions = sess.run(y_pred, {inputs: x_test_batch, is_training: hps.val_is_training, dropout_keep_prob: 1.0})
                all_predictions = np.concatenate([all_predictions, batch_predictions])
            correct_predictions = float(sum(all_predictions == y.flatten()))

            print("Total number of test examples: {}".format(len(y_test)))
            print("Accuracy: {:g}".format(correct_predictions/float(len(y_test))))

            test_y = y_test.argmax(axis = 1)
            conf_mat = confusion_matrix(test_y, all_predictions)

            fig, ax = plt.subplots(figsize = (4,2))

            sns.heatmap(conf_mat, annot=True, fmt = 'd', xticklabels = cat_id_df.category_id.values, yticklabels = cat_id_df.category_id.values)
            font_set = FontProperties(fname = r"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", size=15)
            plt.ylabel(u'实际结果',fontsize = 18,fontproperties = font_set)
            plt.xlabel(u'预测结果',fontsize = 18,fontproperties = font_set)
            print('accuracy %s' % accuracy_score(all_predictions, test_y))
            print(classification_report(test_y, all_predictions,target_names = cat_id_df['category_name'].values))
            print(classification_report(test_y, all_predictions))
        i += 1


