import tensorflow as tf
import readtxt2 as read
import datetime
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
class Config(object):
# 目标分类数目
numClasses = 3
# 拼接长度
# 最大句长
maxSeqLength = 64
# 词向量长度
numDimensions = 200
# 最大简称句长
KEEP_PROB = 0.1 # dropout率
HIDDEN_SIZE = 64 # lstm隐层单元个数
NUM_LAYERS = 1 # lstm层数
VOCAB_SIZE = 10000 # 词表大小
LEARNING_RATE = 0.002 # 学习率
TRAIN_BATCH_SIZE = 64 # 训练batch大小
grad_clip = 4.0 #gradient clipping threshold
# 测试阶段,batch设置为1
EVAL_BATCH_SIZE = 1
EVAL_NUM_STEP = 1
attention_size = 64 # the size of attention layer
class PbAttention(object):
def __init__(self, config, is_training, word_vectors):
self.config = config
self.batch_size = tf.compat.v1.placeholder(tf.int32, name='batch_size')
# 目标分类
self.input_class = tf.compat.v1.placeholder(tf.int32, [None, self.config.numClasses], name="input_class")
# 命中文本
self.input_line = tf.compat.v1.placeholder(tf.int32, [None, self.config.maxSeqLength], name="input_line")
self.is_training = is_training
self.global_step = tf.Variable(0, trainable=False, name='global_step')
self.sequence_lengths = tf.compat.v1.placeholder(tf.int32, shape=[0], name="sequence_lengths")
# [词表大小, 词的向量表示]
self.embedding = tf.compat.v1.get_variable("embedding", shape=[len(word_vectors), 200], initializer=tf.initializers.GlorotUniform(word_vectors))
self.rnn(self.is_training)
tensor_info_x = tf.saved_model.utils.build_tensor_info(self.input_line)
tensor_info_y = tf.saved_model.utils.build_tensor_info(self.y_pred_cls)
self.tensor_info_x = tensor_info_x
self.tensor_info_y = tensor_info_y
logdir = "tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
self.logdir = logdir
merged = tf.summary.merge_all()
self.merged = merged
def rnn(self, is_training):
# Define Basic RNN Cell
def basic_rnn_cell(rnn_size):
# return tf.contrib.rnn.GRUCell(rnn_size)
return tf.contrib.rnn.LSTMCell(rnn_size, state_is_tuple=True)
# Define Forward RNN Cell
with tf.name_scope('fw_rnn'):
fw_rnn_cell = tf.contrib.rnn.MultiRNNCell([basic_rnn_cell(self.config.HIDDEN_SIZE) for _ in range(self.config.NUM_LAYERS)])
if is_training:
fw_rnn_cell = tf.contrib.rnn.DropoutWrapper(fw_rnn_cell, output_keep_prob=self.config.KEEP_PROB)
# Define Backward RNN Cell
with tf.name_scope('bw_rnn'):
bw_rnn_cell = tf.contrib.rnn.MultiRNNCell([basic_rnn_cell(self.config.HIDDEN_SIZE) for _ in range(self.config.NUM_LAYERS)])
if is_training:
bw_rnn_cell = tf.contrib.rnn.DropoutWrapper(bw_rnn_cell, output_keep_prob=self.config.KEEP_PROB)
# Embedding layer
with tf.name_scope('embedding_line'):
input_line_vec = tf.nn.embedding_lookup(self.embedding, self.input_line)
tf.summary.histogram("input_line_vec", input_line_vec)
with tf.name_scope('bi_rnn'):
rnn_output, _ = tf.nn.bidirectional_dynamic_rnn(fw_rnn_cell, bw_rnn_cell, inputs=input_line_vec,
sequence_length=self.sequence_lengths, dtype=tf.float32)
tf.summary.histogram("rnn_output", rnn_output)
if isinstance(rnn_output, tuple):
rnn_output = tf.concat(rnn_output, 2)
# Attention Layer
with tf.name_scope('attention'):
input_shape = rnn_output.shape # (batch_size, sequence_length, hidden_size)
sequence_size = input_shape.as_list[1].value # the length of sequences processed in the RNN layer
hidden_size = input_shape.as_list[2].value # hidden size of the RNN layer
attention_w = tf.Variable(tf.truncated_normal([hidden_size, self.config.attention_size], stddev=0.1),
name='attention_w')
attention_b = tf.Variable(tf.constant(0.1, shape=[self.config.attention_size]), name='attention_b')
attention_u = tf.Variable(tf.truncated_normal([self.config.attention_size], stddev=0.1), name='attention_u')
# tf.summary.distribution("attention_w", attention_w)
z_list = []
for t in range(sequence_size):
u_t = tf.tanh(tf.matmul(rnn_output[:, t, :], attention_w) + tf.reshape(attention_b, [1, -1]))
z_t = tf.matmul(u_t, tf.reshape(attention_u, [-1, 1]))
z_list.append(z_t)
# Transform to batch_size * sequence_size hideen
attention_z = tf.concat(z_list, axis=1)
self.alpha = tf.nn.softmax(attention_z)
attention_output = tf.reduce_sum(rnn_output * tf.reshape(self.alpha, [-1, sequence_size, 1]), 1)
tf.summary.histogram("alpha", self.alpha)
tf.summary.histogram("attention_output", attention_output)
# attention_output shape: (batch_size, hidden_size)
# Add dropout
with tf.name_scope('dropout'):
# attention_output shape: (batch_size, hidden_size)
self.final_output = tf.nn.dropout(attention_output, rate=self.config.KEEP_PROB)
tf.summary.histogram("final_output", self.final_output)
# Fully connected layer
with tf.name_scope('output'):
fc_w = tf.Variable(tf.truncated_normal([hidden_size, self.config.numClasses], stddev=0.1), name='fc_w')
fc_b = tf.Variable(tf.zeros([self.config.numClasses]), name='fc_b')
# 目标向量
self.logits = tf.matmul(self.final_output, fc_w) + fc_b
self.y_pred_cls = tf.argmax(self.logits, 1, name='predictions')
tf.summary.histogram("fc_w", fc_w)
tf.summary.histogram("fc_b", fc_b)
tf.summary.histogram("logits", self.logits)
tf.summary.histogram("y_pred_cls", self.y_pred_cls)
# Calculate cross-entropy loss
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_class)
self.loss = tf.reduce_mean(cross_entropy)
tf.summary.scalar("loss", self.loss)
# Create optimizer
with tf.name_scope('optimization'):
optimizer = tf.train.AdamOptimizer(self.config.LEARNING_RATE)
gradients, variables = zip(*optimizer.compute_gradients(self.loss))
gradients, _ = tf.clip_by_global_norm(gradients, self.config.grad_clip)
self.optim = optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step)
# Calculate accuracy
with tf.name_scope('accuracy'):
correct_pred = tf.equal(self.y_pred_cls, tf.argmax(self.input_class, 1))
self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
tf.summary.scalar("accuracy", self.acc)
def get_sequence_length(x_batch):
"""
Args:
x_batch:a batch of input_data
Returns:
sequence_lenghts: a list of acutal length of every senuence_data in input_data
"""
sequence_lengths=[]
for x in x_batch:
actual_length = np.sum(np.sign(x))
sequence_lengths.append(actual_length)
return sequence_lengths
def run_epoch(session, model, data, target, eval_data, eval_target):
writer = tf.summary.FileWriter(model.logdir, session.graph)
saver = tf.train.Saver()
# state = session.run(model.initial_state) # vlstm单元初始状态
batch_size = 128
# 训练一个epoch。
steps = 5000
dataset_size = len(target)
dataset_size = (dataset_size // batch_size) * batch_size
eval_dataset_size = len(eval_target)
eval_dataset_size = (eval_dataset_size // batch_size) * batch_size
for step in range(steps):
# 每次选取batch_size个样本训练
start = (step * batch_size) % dataset_size
end = min(start + batch_size, dataset_size)
x_batch = data[start:end]
sequence_lengths = get_sequence_length(x_batch)
_batch_size1 = end - start + 1
optimizer, summary, accuracy = session.run([model.optim, model.merged, model.acc],
{model.input_line: x_batch, model.input_class: target[start:end],
model.sequence_lengths: sequence_lengths,
model.batch_size: _batch_size1
})
if step % 10 == 0:
# summary = session.run(model.merged, {model.sequence_lengths: sequence_lengths,
# model.input_line: x_batch,
# model.input_class: target[start:end],
# model.batch_size: batch_size})
writer.add_summary(summary, step)
# print(step, optimizer)
if step % 20 == 0:
# accuracy = session.run(model.acc, {model.sequence_lengths: sequence_lengths,
# model.input_line: x_batch,
# model.input_class: target[start:end],
# model.batch_size: batch_size})
print("step: %d accuracy: %g time: %s" % (step, accuracy, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
# Save the network every 10,000 training iterations
# if step % 5000 == 0 and step != 0:
if step % 100 == 0 and step != 0:
eval_step = step // 100
eval_start = (eval_step * 1000) % eval_dataset_size
eval_end = min(eval_start + 1000, eval_dataset_size)
eval_batch = eval_data[eval_start:eval_end]
eval_batch_class = eval_target[eval_start:eval_end]
eval_sequence_lengths = get_sequence_length(eval_batch)
_batch_size = eval_end - eval_start + 1
optimizer, summary, accuracy = session.run([model.optim, model.merged, model.acc],
{model.input_line: eval_batch,
model.input_class: eval_batch_class,
model.sequence_lengths: eval_sequence_lengths,
model.batch_size: _batch_size
})
print("eval step: %d accuracy: %g time: %s" % (step, accuracy, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
if accuracy > 0.92 and step > 1000:
break
# save_path = saver.save(session, "model/"+str(step)+"/pretrained_lstm.ckpt", global_step=step)
# print("saved to %s" % save_path)
save_path = saver.save(session, "pretrained_lstm.ckpt", global_step=step)
print("saved to %s" % save_path)
writer.close()
def main():
g_2 = tf.Graph()
with g_2.as_default():
words_list, word_vectors, words_list_map = read._read_word2vec("gbn-word2vector.txt")
print("----------------------------------bg-1------------------------------")
# print(words_list.__le__())
# print(len(word_vectors))
#print(len(words_list_map))
targets, words= read._read_train_data("padata-1.txt")
print("----------------------------------bg-2------------------------------")
config = Config()
datax = read._train_uniondata_index(words, config.maxSeqLength, words_list_map)
print("----------------------------------bg-------------------------------")
eval_targets, eval_words = read._read_train_data("padatapre-1.txt")
eval_datax = read._train_uniondata_index(eval_words, config.maxSeqLength, words_list_map)
print("----------------------------------bg-veal-------------------------------")
initializer = tf.random_uniform_initializer(-0.05, 0.05)
with tf.compat.v1.variable_scope("language_model", reuse=None, initializer=initializer):
train_model = PbAttention(config, True, word_vectors)
with tf.Session(graph=g_2) as session:
tf.global_variables_initializer().run()
for i in range(1):
print("In iteration: %d" % (i + 1))
run_epoch(session, train_model, datax, targets, eval_datax, eval_targets)
train_model.is_training = False
prediction_signature = tf.saved_model.signature_def_utils.build_signature_def(
inputs={'input-x': train_model.tensor_info_x},
outputs={'out-y':train_model.tensor_info_y})
legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
# 保存训练模型 java 要调用
builder = tf.saved_model.builder.SavedModelBuilder("model/pb/"
+ datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
builder.add_meta_graph_and_variables(
session, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
'predict_data': prediction_signature},
legacy_init_op=legacy_init_op)
builder.save(False)
graph_def = g_2.as_graph_def()
tf.train.write_graph(graph_def, "models/pb1/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), 'expert-graph.pb', as_text=False)
tf.train.write_graph(graph_def, "models/pb1/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), 'expert-graph2.pb', as_text=True)
if __name__ == "__main__":
print(1)
main()