如何在gru模型构建中遇到这个问题？

在gru模型构建中遇到这个问题，烦请帮着看看


from jieba import lcut
from torchtext.vocab import vocab
from collections import OrderedDict, Counter
from torchtext.transforms import VocabTransform
from torch.nn.utils.rnn import pack_padded_sequence, pad_sequence
from sklearn.preprocessing import LabelEncoder
import scipy.io as io
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch
from torch.optim import Adam
import numpy as np
from utils import metrics, cost, safeCreateDir
import time
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from matplotlib import pyplot as plt
import seaborn as sns

# 数据处理
# """判断一个unicode是否是汉字"""
def is_chinese(uchar):
    if (uchar >= '\u4e00' and uchar <= '\u9fa5') :
        return True
    else:
        return False
# 是中文就留下 不是就跳过
def reserve_chinese(content):
    content_str = ''
    for i in content:
        if is_chinese(i):
            content_str += i
    return content_str
# 读取去停用词库
def getStopWords():
    file = open('D:\zjj project\Graduation project\Emotion_analysis\database\stopwords.txt', 'r',encoding='utf8')
    words = [i.strip() for i in file.readlines()]
    file.close()
    return words
# 数据清洗、分词、去停用词
def dataParse(text, stop_words):
    label,content,= text.split('    ####    ')
    # 去掉非中文词
    content = reserve_chinese(content)
    # print(content)
    # 结巴分词
    words = lcut(content)
    # 去停用词
    words = [i for i in words if not i in stop_words]
    return words, int(label)

def getFormatData():
    file = open('D:\zjj project\Graduation project\Emotion_analysis\database\data\data_sina.txt', 'r',encoding='utf8')
    texts = file.readlines()
    file.close()
    stop_words = getStopWords()
    all_words = []
    all_labels = []
    for text in texts:
        content, label = dataParse(text, stop_words)
        if len(content) <= 0:
            continue
        all_words.append(content)
        all_labels.append(label)

    # 自制词表Vocab
    # 将所有词都汇总到一个列表中
    ws = sum(all_words, [])
    # 统计词频
    set_ws = Counter(ws)
    # 按照词频排序 sorted函数是默认升序排序，当需要降序排序时，需要使用reverse = Ture
    # 以词的形式进行索引
    keys = sorted(set_ws, key=lambda x: set_ws[x], reverse=True)
    # 将词和编号对应起来 制作成字典
    dict_words = dict(zip(keys, list(range(1, len(set_ws) + 1))))
    ordered_dict = OrderedDict(dict_words)
    # # 基于有序字典创建词典 添加特殊符号
    my_vocab = vocab(ordered_dict, specials=['<UNK>', '<SEP>'])

    # 将输入的词元映射成它们在词表中的索引
    vocab_transform = VocabTransform(my_vocab)
    vector = vocab_transform(all_words)

    # 转成tensor
    vector = [torch.tensor(i) for i in vector]
    lengths = [len(i) for i in vector]

    # 对tensor做padding 保证网络定长输入
    pad_seq = pad_sequence(vector, batch_first=True)
    labelencoder = LabelEncoder()
    labels = labelencoder.fit_transform(all_labels)
    data = pad_seq.numpy()
    num_classses = max(labels) + 1
    data = {'X': data,
            'label': labels,
            'num_classes': num_classses,
            'lengths': lengths,
            'num_words': len(my_vocab)}
    print(len(my_vocab))
    io.savemat('./dataset/data/data.mat', data)

# 数据集加载
class Data(Dataset):
    def __init__(self, mode='train'):
        data = io.loadmat('./dataset/data/data.mat')
        self.X = data['X']
        self.y = data['label']
        self.lengths = data['lengths']
        self.num_words = data['num_words'].item()
        train_X, val_X, train_y, val_y, train_length, val_length = train_test_split(self.X, self.y.squeeze(), self.lengths.squeeze(),
                                                                                    test_size=0.4, random_state=1)
        val_X, test_X, val_y, test_y, val_length, test_length = train_test_split(val_X, val_y, val_length, test_size=0.5, random_state=2)
        if mode == 'train':
            self.X = train_X
            self.y = train_y
            self.lengths = train_length
        elif mode == 'val':
            self.X = val_X
            self.y = val_y
            self.lengths = val_length
        elif mode == 'test':
            self.X = test_X
            self.y = test_y
            self.lengths = test_length
    def __getitem__(self, item):
        return self.X[item], self.y[item], self.lengths[item]
    def __len__(self):
        return self.X.shape[0]
class getDataLoader():
    def __init__(self,batch_size):

        train_data = Data('train')
        val_data = Data('val')
        test_data = Data('test')
        # print('test_data',test_data)
        self.traindl = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        self.valdl = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=4)
        self.testdl = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=4)
        self.num_words = train_data.num_words

# 定义网络结构
class GRU(nn.Module):
    def __init__(self, num_words, num_classes, input_size=64, hidden_dim=32, num_layer=2):
        super(GRU, self).__init__()
        self.embeding = nn.Embedding(num_words, input_size)
        self.net = nn.GRU(input_size, hidden_dim, num_layer, batch_first=True, bidirectional=True)
        self.classification = nn.Sequential(
        nn.Linear(hidden_dim, 32),
        nn.ReLU(inplace=True),
        nn.Linear(32, num_classes)
    )
    def forward(self, x, lengths):
        x = self.embeding(x)
        pd = pack_padded_sequence(x, lengths=lengths, batch_first=True, enforce_sorted=False)
        output, hn = self.net(pd)
        pred = self.classification(hn[-1])
        return pred

def plot_acc(train_acc):
        sns.set(style='darkgrid')
        plt.figure(figsize=(10, 7))
        x = list(range(len(train_acc)))
        plt.plot(x, train_acc, alpha=0.9, linewidth=2, label='train acc')
        plt.xlabel('Epoch')
        plt.ylabel('Acc')
        plt.legend(loc='best')
        plt.savefig('results/acc.png', dpi=400)

def plot_loss(train_loss):
        sns.set(style='darkgrid')
        plt.figure(figsize=(10, 7))
        x = list(range(len(train_loss)))
        plt.plot(x, train_loss, alpha=0.9, linewidth=2, label='train loss')
        plt.xlabel('Epoch')
        plt.ylabel('loss')
        plt.legend(loc='best')
        plt.savefig('results/loss.png', dpi=400)


# 定义训练过程
class Trainer():
    def __init__(self):
        safeCreateDir('results/')
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        self._init_data()
        self._init_model()

    def _init_data(self):
        data = getDataLoader(batch_size=64)
        self.traindl = data.traindl
        self.valdl = data.valdl
        self.testdl = data.testdl
        self.num_words = data.num_words

    def _init_model(self):
        self.net = GRU(self.num_words, 6).to(self.device)
        self.opt = Adam(self.net.parameters(), lr=1e-4, weight_decay=5e-4)
        self.cri = nn.CrossEntropyLoss()

    def save_model(self):
        torch.save(self.net.state_dict(), 'saved_dict/gru.pt')
    def load_model(self):
        self.net.load_state_dict(torch.load('saved_dict/gru.pt'))



    def train(self,epochs):
        patten = 'Epoch: %d   [===========]  cost: %.2fs;  loss: %.4f;  train acc: %.4f;  val acc:%.4f;'
        train_accs = []
        c_loss = []
        for epoch in range(epochs):
            cur_preds = np.empty(0)
            cur_labels = np.empty(0)
            cur_loss = 0
            start = time.time()
            for batch, (inputs, targets, lengths) in enumerate(self.traindl):
                inputs = inputs.to(self.device)
                targets = targets.to(self.device)
                lengths = lengths.to('cpu')
                pred = self.net(inputs, lengths)
                loss = self.cri(pred, targets)
                self.opt.zero_grad()
                loss.backward()
                self.opt.step()
                cur_preds = np.concatenate([cur_preds, pred.cpu().detach().numpy().argmax(axis=1)])
                cur_labels = np.concatenate([cur_labels, targets.cpu().numpy()])
                cur_loss += loss.item()
            acc, precision, f1, recall = metrics(cur_preds, cur_labels)
            val_acc, val_precision, val_f1, val_recall = self.val()
            train_accs.append(acc)
            c_loss.append(cur_loss)
            end = time.time()
            print(patten % (epoch,end - start,cur_loss, acc,val_acc))

        self.save_model()
        plot_acc(train_accs)
        plot_loss(c_loss)

    # @torch.no_grad()中的数据不需要计算梯度，也不会进行反向传播
    @torch.no_grad()
    def val(self):
        self.net.eval()
        cur_preds = np.empty(0)
        cur_labels = np.empty(0)
        for batch, (inputs, targets, lengths) in enumerate(self.valdl):
            inputs = inputs.to(self.device)
            targets = targets.to(self.device)
            lengths = lengths.to('cpu')
            pred = self.net(inputs, lengths)
            cur_preds = np.concatenate([cur_preds, pred.cpu().detach().numpy().argmax(axis=1)])
            cur_labels = np.concatenate([cur_labels, targets.cpu().numpy()])
        acc, precision, f1, recall = metrics(cur_preds, cur_labels)
        self.net.train()
        return acc, precision, f1, recall
    @torch.no_grad()
    def test(self):
        print("test ...")
        self.load_model()
        patten = 'test acc: %.4f   precision: %.4f   recall: %.4f    f1: %.4f    '
        self.net.eval()
        cur_preds = np.empty(0)
        cur_labels = np.empty(0)
        for batch, (inputs, targets, lengths) in enumerate(self.testdl):
            inputs = inputs.to(self.device)
            targets = targets.to(self.device)
            lengths = lengths.to('cpu')
            pred = self.net(inputs, lengths)
            cur_preds = np.concatenate([cur_preds, pred.cpu().detach().numpy().argmax(axis=1)])
            cur_labels = np.concatenate([cur_labels, targets.cpu().numpy()])
        acc, precision, f1, recall = metrics(cur_preds, cur_labels)
        cv_conf = confusion_matrix(cur_preds, cur_labels)
        labels11 = ['negative', 'active']
        disp = ConfusionMatrixDisplay(confusion_matrix=cv_conf, display_labels=labels11)
        disp.plot(cmap="Blues", values_format='')
        plt.savefig("results/ConfusionMatrix.tif", dpi=400)
        self.net.train()
        print(patten % (acc,precision,recall,f1))





if __name__ == "__main__":
    getFormatData() # 数据预处理：数据清洗和词向量
    trainer=Trainer()
    trainer.train(epochs=30) #数据训练
    trainer.test() # 测试

写回答
好问题 0 提建议
关注问题
分享
邀请回答
编辑收藏删除结题
收藏举报

1条回答默认最新

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
CSDN-Ada助手 CSDN-AI 官方账号 2023-08-06 18:15
关注
【以下回答由 GPT 生成】

具体问题：目前我正在构建一个GRU模型，但是遇到了以下问题： 1. 我需要使用jieba库对中文文本进行分词，但是我不太了解如何使用该库进行分词。 2. 我需要创建一个词汇表（vocabulary）用于词嵌入，但是我不清楚如何使用torchtext.vocab库中的vocab方法创建词汇表。 3. 我希望能够对文本进行编码，以便于输入模型进行训练，但是我不知道如何使用torchtext.transforms库中的VocabTransform方法进行编码。 4. 我想使用torch.nn.utils.rnn库中的pack_padded_sequence和pad_sequence方法对文本序列进行长度标准化，但是我不熟悉如何使用这些方法。 5. 我想要将标签进行编码以便进行模型训练，但是不清楚如何使用sklearn.preprocessing库中的LabelEncoder方法进行编码。 6. 我计划使用torch.utils.data库中的Dataset和DataLoader类进行数据加载和批处理，但是不知道如何创建自定义的Dataset类和使用DataLoader进行数据加载。 7. 我想要将数据集划分为训练集和测试集，但是不了解如何使用sklearn.model_selection库中的train_test_split方法进行划分。 8. 我需要定义GRU模型的结构，但是对于如何使用torch.nn库中的nn.Module类和相关的模块来定义模型结构不太了解。 9. 我希望能够使用torch.optim库中的Adam优化器来训练模型，但是我不清楚如何使用该优化器。 10. 我需要了解如何计算模型的准确率、损失函数以及如何保存和加载模型。 11. 我希望能够绘制混淆矩阵和其他相关的可视化结果，但是不熟悉如何使用matplotlib.pyplot和seaborn库进行可视化。

我希望能够获得关于以上问题的具体解决方案和代码示例。谢谢！

【相关推荐】

这篇文章讲的很详细，请看：深度学习时间序列预测：GRU算法构建单变量时间序列预测模型+代码实战
除此之外, 这篇博客: GRU网络的实现中的 GRU网络的实现 部分也许能够解决你的问题。

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^
解决无用
评论打赏
分享
举报

评论

按下Enter换行，Ctrl+Enter发表内容

报告相同问题？

关注问题

GRU模型及其全称用于Python
2025-12-12 09:36

在Python编程语言中，PyTorch是构建深度学习模型的一个常用框架，它提供了灵活的API来实现各类神经网络。在PyTorch框架中，用户可以利用其内置的RNN模块来轻松构建LSTM和GRU模型。文档中的“SequencePrediction”...
为学习目的从零开始编写大语言模型（LLM）相关全部代码
2025-08-23 12:51

在这一过程中，学习者不仅可以获得编写大型机器学习项目的经验，还能深入理解机器学习项目从设计到部署的完整生命周期。此外，通过与开源社区的互动和学习，学习者还能够不断地提升自己的编程技能和工程实践能力。 ...
语言模型在复杂系统风险评估与管理中的能力
2025-03-25 03:41

程序员光剑的博客在当今复杂多变的世界中，各类复杂系统如金融系统、能源系统、交通系统等面临着诸多不确定性和风险。...本研究的目的在于深入探讨语言模型在复杂系统风险评估与管理中的能力，明确其优势、适用范围和潜在挑战。
大语言模型(LLM)入门学习路线图
2024-02-28 17:36

Kk-Quiana的博客 Github项目上有一个，它涵盖了大语言模型基础学习，LLM前沿算法和架构学习，以及如何将大语言模型进行工程化，是一个很好的帮助初学者入门大语言模型的路线图。
语言模型在时空数据挖掘与预测中的应用
2025-12-26 20:56

AI云原生与云计算技术学院的博客时空数据广泛存在于地理信息系统、交通、气象等众多领域，对这些数据进行有效的挖掘和...本文的目的是深入研究语言模型在时空数据挖掘与预测中的应用，探讨如何利用语言模型的优势来提高时空数据处理的效率和准确性。
语言模型在科学发现过程模拟中的应用探索
2025-10-23 22:57

AIGC应用创新大全的博客科学发现是推动人类社会进步的重要力量，其过程涉及到观察、假设、...本文的目的在于探索语言模型在科学发现过程模拟中的应用，研究如何利用语言模型的强大能力来模拟科学发现的各个环节，提高科学发现的效率和准确性。
语言模型在多维情感分析与虚拟角色互动中的进展研究
2025-10-28 02:25

AI大模型应用之禅的博客本研究的目的在于深入探讨语言模型在多维情感分析与虚拟角色互动中的应用进展，分析其优势与不足，为该领域的进一步发展提供理论支持和实践指导。研究范围涵盖了语言模型在多维情感分析中的应用，包括情感分类、情感...
语言模型在复杂系统故障预测中的应用探索
2025-03-16 21:39

SuperAGI架构师的AI实验室的博客本研究的目的在于探索语言模型在复杂系统故障预测中的应用潜力，研究如何利用语言模型对复杂系统产生的大量文本数据（如维护日志、故障报告、传感器数据的文本描述等）进行分析和处理，以实现对复杂系统故障的有效...
语言模型在时空数据分析与城市动态预测中的精度提升研究
2025-10-19 22:25

Golang编程笔记的博客本研究的目的在于探索如何充分发挥语言模型的优势，提升其在时空数据分析与城市动态预测中的精度。研究范围涵盖了语言模型的选择、数据预处理、特征工程、模型训练和评估等多个方面，旨在建立一套有效的方法和流程，...
语言模型在复杂决策支持系统中的推理能力与可解释性研究
2025-10-22 01:56

AI算力网络与通信的博客本研究的目的在于深入探讨语言模型在复杂决策支持系统中的推理能力与可解释性，明确其在决策过程中的作用和价值，评估其有效性和可靠性。研究范围涵盖了语言模型的基本原理、推理算法、可解释性方法，以及在不同领域...
语言模型在创新产品概念生成与未来社会需求预测中的研究
2025-10-10 16:09

程序员光剑的博客研究范围涵盖了语言模型的基本原理、相关算法、在不同行业的应用场景，以及如何将其与实际产品开发和市场分析相结合。本文将按照以下结构进行阐述：首先介绍相关的核心概念和它们之间的联系，通过文本示意图和 ...
Coursera Deeplearning.ai 序列模型编程解答
2018-02-23 14:39

在深度学习领域，序列模型是一种重要的技术，尤其在处理时间序列数据、自然语言处理（NLP）和语音识别等任务中发挥着核心作用。Coursera上的Deeplearning.ai课程系列提供了对这一主题的深入讲解，其中包括了编程作业...
语言模型推理能力的认知风格影响因素分析
2025-03-25 21:38

程序员光剑的博客本研究的目的在于系统地分析认知风格的各个方面如何影响语言模型的推理能力，范围涵盖从认知风格的基本概念到其在语言模型推理中的具体作用机制，以及相关的技术实现和实际应用。本文首先介绍研究的背景信息，包括...
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 8月6日

如何在gru模型构建中遇到这个问题？

1条回答 默认 最新

问题事件

1条回答默认最新