network爬虫 2023-12-07 22:33 采纳率: 20%
浏览 16
已结题

二分类改为多分类问题

import paddle
import paddlehub as hub
import ast
import argparse
from paddlehub.datasets.base_nlp_dataset import TextClassificationDataset


class MyDataset(TextClassificationDataset):
    # 数据集存放目录
    base_path = 'data/weibo_senti_100k'
    # 数据集的标签列表,多分类标签格式为['0', '1', '2', '3',...]
    label_list = ['0', '1', '2','3','4','5','6']

def __init__(self, tokenizer, max_seq_len: int = 128, mode: str = 'train'):
    if mode == 'train':
        data_file = 'train.tsv'
    elif mode == 'test':
        data_file = 'test.tsv'
    else:
        data_file = 'dev.tsv'
    super().__init__(
        base_path=self.base_path,
        tokenizer=tokenizer,
        max_seq_len=max_seq_len,
        mode=mode,
        data_file=data_file,
        label_list=self.label_list,
        is_file_with_header=True)
if __name__ == '__main__':
    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
    parser.add_argument("--use_gpu", type=ast.literal_eval, default=True,
                        help="Whether use GPU for fine-tuning, input should be True or False")
    parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
    parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.")
    parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
    parser.add_argument("--checkpoint_dir", type=str, default='./ernie_checkpoint',
                        help="Directory to model checkpoint")
    parser.add_argument("--save_interval", type=int, default=1, help="Save checkpoint every n epoch.")
    args = parser.parse_args()

    # 选择模型、任务和类别数
    model = hub.Module(name='ernie_tiny', task='seq-cls', num_classes=len(MyDataset.label_list))

    train_dataset = MyDataset(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='train')
    dev_dataset = MyDataset(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='dev')
    test_dataset = MyDataset(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='test')

    optimizer = paddle.optimizer.Adam(learning_rate=args.learning_rate, parameters=model.parameters())
    trainer = hub.Trainer(model, optimizer, checkpoint_dir=args.checkpoint_dir, use_gpu=False)
    trainer.train(train_dataset, epochs=args.num_epoch, batch_size=args.batch_size, eval_dataset=dev_dataset,
                  save_interval=args.save_interval)
    # 在测试集上评估当前训练模型
    trainer.evaluate(test_dataset, batch_size=args.batch_size)

出错提示为
AssertionError: Variable Shape not match, Variable [ linear_19.w_0_moment1_0 ] need tensor with shape [1024, 7] but load set tensor with shape [1024, 3]

  • 写回答

15条回答 默认 最新

  • 专家-郭老师 Java领域新星创作者 2023-12-07 22:34
    关注

    由人工智能和答主提供,可以参考如下,如果回答的不正确,及时评论区回复,我追加回答,谢谢。


    这个错误提示是因为在加载预训练模型时,权重矩阵的形状不匹配。具体来说,线性层(linear_19.w_0_moment1_0)需要一个形状为[1024, 7]的张量,但加载的张量形状为[1024, 3]。为了解决这个问题,你需要检查预训练模型的权重矩阵,并确保它与你的模型中的线性层相匹配。你可以尝试重新下载一个与你的模型结构相匹配的预训练模型。

    修改后代码:

    import paddle
    import paddlehub as hub
    import ast
    import argparse
    from paddlehub.datasets.base_nlp_dataset import TextClassificationDataset
    
    class MyDataset(TextClassificationDataset):
        # 数据集存放目录
        base_path = 'data/weibo_senti_100k'
        # 数据集的标签列表,多分类标签格式为['0', '1', '2', '3',...]
        label_list = ['0', '1', '2','3','4','5','6']
    
    def __init__(self, tokenizer, max_seq_len: int = 128, mode: str = 'train'):
        if mode == 'train':
            data_file = 'train.tsv'
        elif mode == 'test':
            data_file = 'test.tsv'
        else:
            data_file = 'dev.tsv'
        super().__init__(
            base_path=self.base_path,
            tokenizer=tokenizer,
            max_seq_len=max_seq_len,
            mode=mode,
            data_file=data_file,
            label_list=self.label_list,
            is_file_with_header=True)
    
    if __name__ == '__main__':
        parser = argparse.ArgumentParser(__doc__)
        parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
        parser.add_argument("--use_gpu", type=ast.literal_eval, default=True,
                            help="Whether use GPU for fine-tuning, input should be True or False")
        parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
        parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.")
        parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
        parser.add_argument("--checkpoint_dir", type=str, default='./ernie_checkpoint',
                            help="Directory to model checkpoint")
        parser.add_argument("--save_interval", type=int, default=1, help="Save checkpoint every n epoch.")
        args = parser.parse_args()
    
        # 选择模型、任务和类别数
        model = hub.Module(name='ernie_tiny', task='seq-cls', num_classes=len(MyDataset.label_list))
    
        train_dataset = MyDataset(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='train')
        dev_dataset = MyDataset(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='dev')
        test_dataset = MyDataset(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='test')
    
        optimizer = paddle.optimizer.Adam(learning_rate=args.learning_rate, parameters=model.parameters())
        trainer = hub.Trainer(model, optimizer, checkpoint_dir=args.checkpoint_dir, use_gpu=False)
        trainer.train(train_dataset, epochs=args.num_epoch, batch_size=args.batch_size, eval_dataset=dev_dataset,
                      save_interval=args.save_interval)
        # 在测试集上评估当前训练模型
        trainer.evaluate(test_dataset, batch_size=args.batch_size)
    
    
    
    评论 编辑记录

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 12月14日
  • 创建了问题 12月7日

悬赏问题

  • ¥15 如何构建全国统一的物流管理平台?
  • ¥100 ijkplayer使用AndroidStudio/CMake编译,如何支持 rtsp 直播流?
  • ¥20 和学习数据的传参方式,选择正确的传参方式有关
  • ¥15 这是网络安全里面的poem code
  • ¥15 用js遍历数据并对非空元素添加css样式
  • ¥15 使用autodl云训练,希望有直接运行的代码(关键词-数据集)
  • ¥50 python写segy数据出错
  • ¥20 关于线性结构的问题:希望能从头到尾完整地帮我改一下,困扰我很久了
  • ¥30 3D多模态医疗数据集-视觉问答
  • ¥20 设计一个二极管稳压值检测电路