吃了糖的工藤新爵 2022-02-23 18:33 采纳率: 0%
浏览 13

深度学习实战中文聊天对话机器人出现的问题

问题遇到的现象和发生背景
问题相关代码,请勿粘贴截图
运行结果及报错内容
我的解答思路和尝试过的方法
我想要达到的结果
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras import layers
import time
import keras
import numpy as np
import re
import os
from datetime import datetime
import io
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import unicodedata
from matplotlib.font_manager import FontProperties
font = FontProperties(fname="/Library/Fonts/Songti.ttc",size=8)


def process_cut(source_path,cut_path):
    convs = []
    with open(source_path,'r',encoding='utf8') as f:
        complete_dialog = []
        for line in f:
            line = line.strip('\n')
            line = re.sub("[\s+\.\!\/_,$%?^*(+\"\']+|[+!,。?~@#.&*()""]+","",line)
            if line == "":
                continue
            if line[0] == "E":
                if complete_dialog:
                    convs.append(complete_dialog)
                    complete_dialog = []
            if line[0] == "M":
                complete_dialog.append(line[1:])
    return convs


def question_answer(convs):
    questions = []
    answers = []
    for conv in convs:
        if len(conv) == 1:
            continue
        if len(conv) % 2 !=0 :
            conv = conv[:-1]
        for i in range(len(conv)):
            if i % 2 ==0:
                questions.append("<start>"+" ".join(conv[i])+"<end>")
            else:
                answers.append("<start>"+" ".join(conv[i])+"<end>")
    return questions,answers


def tokenize(datas):
    tokenizer = keras.preprocessing.text.Tokenizer(filters="")
    tokenizer.fit_on_texts(datas)
    voc_li = tokenizer.texts_to_sequences(datas)
    voc_li = keras.preprocessing.sequence.pad_sequences(voc_li,padding="post")
    return voc_li,tokenizer


class Encoder(tf.keras.Model):
    def __init__(self,vocab_size,embedding_dim,enc_units,batch_sz):
        super(Encoder,self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = keras.layers.Embedding(vocab_size,embedding_dim)
        self.gru = keras.layers.GRU(
            self.enc_units,
            return_sequences=True,
            return_state=True,
            recurrent_initializer="glorot_uniform"
        )
    @tf.function
    def call(self,x,hidden):
        x = self.embedding(x)
        output, state = self.gru(x,initial_state=hidden)
        return output,state
    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz,self.enc_units))


class BahdanauAttentionMechanism(tf.keras.layers.Layer):
    def __init__(self,units):
        super(BahdanauAttentionMechanism,self).__init__()
        self.W1 = layers.Dense(units)
        self.W2 = layers.Dense(units)
        self.v = layers.Dense(1)
    @tf.function
    def call(self,query,values):
        hidden_with_time_axis = tf.expand_dims(query,1)
        score = self.v(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis = 1)))
        attention_weights = tf.nn.softmax(score,axis=1)
        context_vector = attention_weights * values
        context_vector = tf.math.reduce_sum(context_vector,axis=1)
        return context_vector,attention_weights


class Decoder(tf.keras.Model):
    def __init__(self,vocab_size,embedding_dim,dec_units,batch_sz):
        super(Decoder,self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.embedding = layers.Embedding(vocab_size,embedding_dim)
        self.gru = layers.GRU(self.dec_units,retrun_sequences=True,return_state=True,return_initializer=True)
        self.fc = layers.Dense(vocab_size)
        self.attention = BahdanauAttentionMechanism(self.dec_units)
    @tf.function
    def call(self,x,hidden,enc_output):
        context_vector, attention_weights = self.attention(hidden, enc_output)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=1)
        output, state = self.gru(x)
        output = tf.reshape(output, (-1, output.shape[2]))
        x = self.fc(output)
        return x, state, attention_weights


def loss(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real,0))
    loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction="none")
    loss_value = loss_obj(real, pred)
    mask = tf.cast(mask, dtype=loss_value.dtype)
    loss_value *= mask
    return tf.math.reduce_mean(loss_value)


def grad_loss(q,a,q_hidden,encoder,decoder,q_index,BATCH_SIZE):
    loss_value = 0
    with tf.GradientTape() as tape:
        q_output, q_hidden = encoder(q, q_hidden)
        a_hidden = q_hidden
        a_input = tf.expand_dims(
            [q_index.word_index["<start>"]]*BATCH_SIZE,1)
        for vector in range(1, a.shape[1]):
            predictions, a_hidden, _=decoder(a_input, a_hidden, q_output)
            loss_value += loss(a[:vector], predictions)
            a_input = tf.expand_dims(a[:, vector], 1)
        batch_loss = (loss_value/int(a.shape[1]))
        variables = encoder.trainable_variables + decoder.trainable_variables
        return batch_loss, tape.gradient(loss_value, variables)


def optimizer_loss(q,a,q_hidden,encoder,decoder,q_index,BATCH_SIZE,optimizer):
    batch_loss, grads = grad_loss(q,a,q_hidden,encoder,decoder,q_index,BATCH_SIZE)
    variables = encoder.trainable_variables + decoder.trainable_variables
    optimizer.apply_gradients(zip(grads, variables))
    return batch_loss


def source_data(source_path):
    convs = process_cut(source_path,None)
    questions, answers = question_answer(convs)
    return questions, answers

def train_model(q_hidden, encoder, decoder, q_index, BATCH_SIZE, dataset, steps_per_epoch, optimizer, checkpoint,checkpoint_prefix,summary_writer):
    i = 0
    EPOCHS = 200
    for epoch in range(EPOCHS):
        start = time.time()
        a_hidden = encoder.initialize_hidden_state()
        total_loss = 0
        for(batch, (q, a)) in enumerate(dataset.take(steps_per_epoch)):
            batch_loss = optimizer_loss(q,a,q_hidden,encoder,decoder,q_index,BATCH_SIZE,optimizer)
            total_loss += batch_loss
            with summary_writer.as_default():
                tf.summary.scalar("batch loss", batch_loss.numpy(), step=epoch)
            if batch % 100 == 0:
                print("第{}次训练,第{}批数据损失值:{:.4f}".format(epoch+1,batch+1,batch_loss.numpy()))
            with summary_writer.as_default():
                tf.summary.scalar("total loss", total_loss/steps_per_epoch, step=epoch)
            if(epoch+1) % 100 == 0:
                i += 1
                print("====第{}几次保存训练模型====".format(i))
                checkpoint.save(file_prefix=checkpoint_prefix)
            print("第{}次训练,总损失值:{:.4f}".format(epoch+1,total_loss/steps_per_epoch))
            print("训练耗时:{:.1f}秒".format(time.time()-start))


def preprocess_question(question):
    question = "<start> "+" ".join(question) + " <end>"
    return question

def max_length(vectors):
    return max(len(vector) for vector in vectors)

def convert(index, vectors):
    for vector in vectors:
        if vector != 0:
            print("{}-->{}".format(vector, index.index_word[vector]))


if __name__ == "__main__":
    stamp = datetime.now().strftime("%Y%m%d-%H:%M:%S")
    source_path ="./chat/chat data.py"
    convs = process_cut(source_path,None)
    questions,answers = question_answer(convs)
    q_vec , q_index = tokenize(questions)
    a_vec, a_index = tokenize(answers)
    q_max_len = max_length(q_vec)
    a_max_len = max_length(a_vec)
    convert(q_index, q_vec[0])
    BUFFER_SIZE = len(q_vec)
    print("buffer size:",BUFFER_SIZE)
    BATCH_SIZE = 64
    step_per_epoch = len(q_vec)//BATCH_SIZE
    embedding_dim = 256
    units = 1024
    q_vocab_size = len(q_index.word_index)+1
    a_vocab_size = len(a_index.word_index)+1
    dataset = tf.data.Dataset.from_tensor_slices((q_vec, a_vec)).shuffle(BUFFER_SIZE)
    q_batch, a_batch = next(iter(dataset))
    print("question batch:", q_batch.shape)
    print("answer batch:", a_batch.shape)
    log_path = "logs3\\chat\\"
    summary_writer = tf.summary.create_file_writer(log_path)
    tf.summary.trace_on(graph=True, profiler=True)
    encoder = Encoder(
        q_vocab_size,
        embedding_dim,
        units,
        BATCH_SIZE)
    q_hidden = encoder.initialize_hidden_state()
    print(q_hidden)
    q_output, q_hidden = encoder.call(q_batch, q_hidden)
    with summary_writer.as_default():
        tf.summary.trace_export(name="chat-en", step=0, profiler_outdir=log_path)

        tf.summary.trace_on(graph=True, profiler=True)
        attention_layer =BahdanauAttentionMechanism(10)
        attention_result, attention_weights = attention_layer.call(q_hidden, q_output)
    with summary_writer.as_default():
        tf.summary.trace_export(name="chat-atten", step=0, profiler_outdir=log_path)
        tf.summary.trace_on(graph=True, profiler=True)
        decoder =Decoder(
            a_vocab_size,
            embedding_dim,
            units,
            BATCH_SIZE
        )
        a_output, _, _ = decoder.call(
            tf.random.uniform((64,1)),
            q_hidden,
            q_output
        )
    with summary_writer.as_default():
        tf.summary.trace_export(name="chat-dec", step=0, profiler_outdir=log_path)
        optimizer = tf.keras.optimizers.Adam()
        checkpoint_dir = "./models"
        checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
        checkpoint = tf.train.Checkpoint(
            optimizer=optimizer,
            encoder=encoder,
            decoder=decoder
        )

为什么报错说我ValueError: Input 0 of layer "gru" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (17, 256)

  • 写回答

1条回答 默认 最新

  • 三只小菜猿 PHP领域新星创作者 2022-02-23 18:42
    关注

    看下你模型当中这个是ndim=3, found ndim=2. Full shape received: (17, 256) 是不是有误已经提示你写的ndim是3但是找到的是2

    评论

报告相同问题?

问题事件

  • 创建了问题 2月23日

悬赏问题

  • ¥15 求某类社交网络数据集
  • ¥15 靶向捕获探针方法/参考文献
  • ¥15 很抱歉出现错误word不能启动(24),如何解决?
  • ¥15 javascript下载网络图片
  • ¥15 excel中的填充效果 对应poi的哪个对象啊
  • ¥20 急需求一个 手机抓包app支付链接脚本
  • ¥15 byte[]转String后值为[object Object]
  • ¥20 STM32 驱动AD7734,AD7734读出错误数值: 16777215不能变
  • ¥15 mmrotate官方的test工具死循环
  • ¥15 isolar-AB软件安装后打开出错