机器学习：AttributeError:

在机器学习中，清华大学开源的visualGLM-6B中sat模型，进行微调时报错！个人感觉可能依赖版本升级导致属性移除

AttributeError: 'FakeTokenizer' object has no attribute 'encode'

怎么解决呢？望指导

import os
import torch
import argparse

from sat import mpu, get_args, get_tokenizer
from sat.training.deepspeed_training import training_main
from model import VisualGLMModel
from sat.model.finetune import PTuningV2Mixin
from sat.model.finetune.lora2 import LoraMixin

class FineTuneVisualGLMModel(VisualGLMModel):
    def __init__(self, args, transformer=None, parallel_output=True, **kw_args):
        super().__init__(args, transformer=transformer, parallel_output=parallel_output, **kw_args)
        if args.use_ptuning:
            self.add_mixin("ptuning", PTuningV2Mixin(args.num_layers, args.hidden_size // args.num_attention_heads, args.num_attention_heads, args.pre_seq_len))
        if args.use_lora:
            self.add_mixin("lora", LoraMixin(args.num_layers, args.lora_rank, layer_range=args.layer_range), reinit=True)
            # self.get_mixin("eva").model.glm_proj = replace_linear_with_lora(self.get_mixin("eva").model.glm_proj, LoraLinear, args.lora_rank)
        elif args.use_qlora:
            self.add_mixin("lora", LoraMixin(args.num_layers, args.lora_rank, layer_range=args.layer_range, qlora=True), reinit=True)
        self.args = args
        
    @classmethod
    def add_model_specific_args(cls, parser):
        group = parser.add_argument_group('VisualGLM-finetune', 'VisualGLM finetune Configurations')
        group.add_argument('--pre_seq_len', type=int, default=8)
        group.add_argument('--lora_rank', type=int, default=10)
        group.add_argument('--use_ptuning', action="store_true")
        group.add_argument('--use_lora', action="store_true")
        group.add_argument('--use_qlora', action="store_true")
        group.add_argument('--layer_range', nargs='+', type=int, default=None)
        return super().add_model_specific_args(parser)

    def disable_untrainable_params(self):
        enable = []
        if self.args.use_ptuning:
            enable.extend(['ptuning'])
        if self.args.use_lora or self.args.use_qlora:
            enable.extend(['matrix_A', 'matrix_B'])
        for n, p in self.named_parameters():
            flag = False
            for e in enable:
                if e.lower() in n.lower():
                    flag = True
                    break
            if not flag:
                p.requires_grad_(False)
            else:
                print(n)


def get_batch(data_iterator, args, timers):
    # Items and their type.
    keys = ['input_ids', 'labels']
    datatype = torch.int64

    # Broadcast data.
    timers('data loader').start()
    if data_iterator is not None:
        data = next(data_iterator)
    else:
        data = None
    timers('data loader').stop()
    data_b = mpu.broadcast_data(keys, data, datatype)
    data_i = mpu.broadcast_data(['image'], data, torch.float32)
    # Unpack.
    tokens = data_b['input_ids'].long()
    labels = data_b['labels'].long()
    img = data_i['image']
    if args.fp16:
        img = img.half()
    
    return tokens, labels, img, data['pre_image']


from torch.nn import CrossEntropyLoss

def forward_step(data_iterator, model, args, timers):
    """Forward step."""

    # Get the batch.
    timers('batch generator').start()
    tokens, labels, image, pre_image = get_batch(
        data_iterator, args, timers)
    timers('batch generator').stop()

    logits = model(input_ids=tokens, image=image, pre_image=pre_image)[0]
    dtype = logits.dtype
    lm_logits = logits.to(torch.float32)

    # Shift so that tokens < n predict n
    shift_logits = lm_logits[..., :-1, :].contiguous()
    shift_labels = labels[..., 1:].contiguous()
    # Flatten the tokens
    loss_fct = CrossEntropyLoss(ignore_index=-100)
    loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

    lm_logits = lm_logits.to(dtype)
    loss = loss.to(dtype)
    return loss, {'loss': loss}


from model.blip2 import BlipImageEvalProcessor
from torch.utils.data import Dataset
import json
from PIL import Image

class FewShotDataset(Dataset):
    def __init__(self, path, processor, tokenizer, args):
        max_seq_length = args.max_source_length + args.max_target_length
        with open(path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        self.images = []
        self.input_ids = []
        self.labels = []
        for item in data:
            image = processor(Image.open(item['img']).convert('RGB'))
            input0 = tokenizer.encode("<img>", add_special_tokens=False)
            input1 = [tokenizer.pad_token_id] * args.image_length
            input2 = tokenizer.encode("</img>问："+item['prompt']+"\n答：", add_special_tokens=False)
            a_ids = sum([input0, input1, input2], [])
            b_ids = tokenizer.encode(text=item['label'], add_special_tokens=False)
            if len(a_ids) > args.max_source_length - 1:
                a_ids = a_ids[: args.max_source_length - 1]
            if len(b_ids) > args.max_target_length - 2:
                b_ids = b_ids[: args.max_target_length - 2]
            pre_image = len(input0)
            input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)

            context_length = input_ids.index(tokenizer.bos_token_id)
            mask_position = context_length - 1
            labels = [-100] * context_length + input_ids[mask_position+1:]
            
            pad_len = max_seq_length - len(input_ids)
            input_ids = input_ids + [tokenizer.pad_token_id] * pad_len
            labels = labels + [tokenizer.pad_token_id] * pad_len
            if args.ignore_pad_token_for_loss:
                labels = [(l if l != tokenizer.pad_token_id else -100) for l in labels]
            self.images.append(image)
            self.input_ids.append(input_ids)
            self.labels.append(labels)
        self.pre_image = pre_image

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return {
            "image": self.images[idx],
            "input_ids": self.input_ids[idx],
            "labels": self.labels[idx],
            "pre_image": self.pre_image
        }


def create_dataset_function(path, args):
    tokenizer = get_tokenizer(args)
    image_processor = BlipImageEvalProcessor(224)

    dataset = FewShotDataset(path, image_processor, tokenizer, args)
    return dataset


if __name__ == '__main__':
    py_parser = argparse.ArgumentParser(add_help=False)
    py_parser.add_argument('--max_source_length', type=int)
    py_parser.add_argument('--max_target_length', type=int)
    py_parser.add_argument('--ignore_pad_token_for_loss', type=bool, default=True)
    # py_parser.add_argument('--old_checkpoint', action="store_true")
    py_parser.add_argument('--source_prefix', type=str, default="")
    py_parser = FineTuneVisualGLMModel.add_model_specific_args(py_parser)
    known, args_list = py_parser.parse_known_args()
    args = get_args(args_list)
    args = argparse.Namespace(**vars(args), **vars(known))
    args.device = 'cpu'

    model_type = 'visualglm-6b'
    model, args = FineTuneVisualGLMModel.from_pretrained(model_type, args)
    if torch.cuda.is_available():
        model = model.to('cuda')
    tokenizer = get_tokenizer(args)
    label_pad_token_id = -100 if args.ignore_pad_token_for_loss else tokenizer.pad_token_id
    def data_collator(examples):
        for example in examples:
            example['input_ids'] = torch.tensor(example['input_ids'], dtype=torch.long)
            example['labels'] = torch.tensor(example['labels'], dtype=torch.long)
        ret = {
            'input_ids': torch.stack([example['input_ids'] for example in examples]),
            'labels': torch.stack([example['labels'] for example in examples]),
            'image': torch.stack([example['image'] for example in examples]),
            'pre_image': example['pre_image']
        }
        return ret
    training_main(args, model_cls=model, forward_step_function=forward_step, create_dataset_function=create_dataset_function, collate_fn=data_collator)

写回答
好问题 0 提建议
关注问题
分享
邀请回答
编辑收藏删除
收藏举报

1条回答默认最新

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
进哥聊编程新星创作者: 编程框架技术领域 2023-12-13 20:52
关注
看起来你在尝试使用清华大学开源的visualGLM-6B中的sat模型进行微调时遇到了问题。你的错误信息是“'FakeTokenizer' object has no attribute 'encode'”。

这个错误通常是因为你尝试调用的对象没有你试图使用的属性或方法。在这个情况下，你尝试在'FakeTokenizer'对象上调用'encode'方法，但是这个对象并没有这个方法。

解决这个问题的方法取决于你的代码和你试图实现的功能。以下是一些可能的解决方案：

确保你使用的tokenizer正确：tokenizer应该具有'encode'方法。你可能需要检查你的tokenizer是否正确初始化，或者是否使用了正确的库或API。
检查你的代码：确保你在正确的对象上调用'encode'方法。可能你错误地在一个没有'encode'方法的对象上调用它。
更新你的库或API：如果你的代码依赖于特定的库或API，并且这些库或API已经更新，那么你可能需要更新你的代码以适应新的版本。在这种情况下，错误可能是由于新版本中移除了旧版本中的某些属性或方法。

对于具体的代码示例，我需要看到更多的上下文信息，包括你的tokenizer是如何初始化的，以及你是在哪里和如何调用'encode'方法的。但是希望以上信息对你有所帮助。

本回答被题主选为最佳回答 , 对您是否有帮助呢?

解决无用
评论打赏
分享
举报

评论

按下Enter换行，Ctrl+Enter发表内容

报告相同问题？

关注问题

机器学习问题：AttributeError: ‘NoneType‘ object has no attribute ‘split‘ 解决办法
2024-08-16 15:04

零零鲎的博客本次博客参考http://t.csdnimg.cn/8E7eH。写下来主要是为了整理自己在学习过程中遇到的问题并把解决办法列出来。
【Python】已解决：AttributeError: target_names（机器学习中查看数据信息报错）
2024-07-11 09:09

屿小夏的博客然而，在尝试获取某些数据集的目标名称（target_names）时，有时会遇到AttributeError: target_names这样的报错。在这个例子中，我们试图打印鸢尾花数据集的目标名称，但由于拼写错误（target_name而不是target_...
python错误提示：AttributeError: ‘DataFrame‘ object has no attribute ‘append‘
2023-07-31 09:11

王开心.的博客错误提示： AttributeError: ‘DataFrame’ object has no attribute ‘append’ 出现错误的代码： df_train_log = pd.DataFrame() df_train_log = df_train_log.append(log_train, ignore_index=True) 原因： ...
深度学习：AttributeError: module ‘torchvision.transforms‘ has no attribute ‘Scale‘
2022-06-20 11:00

拜托别延毕_a的博客报错：AttributeError: module 'torchvision.transforms' has no attribute 'Scale'
解决：AttributeError: ‘DictVectorizer‘ object has no attribute ‘get_feature_names‘
2024-10-20 10:49

我是一只小小小小龙的博客解决AttributeError: ‘DictVectorizer’ object has no attribute ‘get_feature_names’ 在当前版本的 scikit-learn 中，DictVectorizer类已经没有get_feature_names方法了。如果你想要获取转换后的特征名，可以...
解决：AttributeError: ‘DataFrame‘ object has no attribute ‘append‘！！！
2024-03-28 13:25

今天不想Debug的博客解决：AttributeError: 'DataFrame' object has no attribute 'append'！！！
【Python】成功解决Error：AttributeError: module ‘numpy‘ has no attribute ‘long‘.
2024-06-22 15:21

云天徽上的博客【Python】成功解决Error：AttributeError: module ‘numpy’ has no attribute ‘long’ 欢迎莅临我的个人主页这里是我深耕Python编程、机器学习和自然语言处理（NLP）领域，并乐于分享知识与经验的小天地！...
【Python】已解决报错：AttributeError: module ‘json‘ has no attribute ‘loads‘解决办法
2024-06-14 18:39

程序员洲洲的博客【Python】已解决报错：AttributeError: module 'json' has no attribute 'loads'解决办法
AttributeError: ‘int‘ object has no attribute ‘type‘
2024-11-22 14:44

绝区零高手的博客 gpus] File "/root/miniconda3/lib/python3.8/site-packages/torch/nn/parallel/_functions.py", line 117, in _get_stream if device.type == "cpu": AttributeError: 'int' object has no attribute 'type' 报错...
【已解决】python numpy 错误：AttributeError: module ‘numpy‘ has no attribute ‘bool‘
2023-11-11 00:28

鳗小鱼的博客我看有的版本说是numpy需要从1.22.x升级到1.22.3，但是我的是1.24.4，那我就发现是numpy版本不符合，换一下版本，应该就能解决，按照如下操作。
深度学习：AttributeError: module ‘scipy.misc‘ has no attribute ‘toimage‘
2021-06-17 10:18

九痴九戒的博客 AttributeError: module 'scipy.misc' has no attribute 'toimage' 报错提示为：AttributeError: module ‘scipy.misc’ has no attribute ‘toimage’ 源代码如下： scipy.misc.toimage(image_array, cmin=0.0, ...
AttributeError: ‘RTDETRDecoder‘ object has no attribute ‘stride‘
2025-04-03 18:53

A_timing的博客网上看了很多教程，大概都是需要改断点训练，或者使用最新版本Ultralytics,但作者尝试很久,这些方法都不能成功，最后去github论坛才发现导入有问题。
RecBole：AttributeError: module ‘ray.tune’ has no attribute ‘report’
2024-10-09 20:57

Fulai Cui的博客在执行 RecBole 的 run_hyper.py 时，遇到AttributeError: module ‘ray.tune’ has no attribute ‘report’，记录解决方法。
使用transfomers.Trainer进行训练，出现报错：AttributeError: module ‘wandb‘ has no attribute ‘termwarm‘以及‘log’
2024-08-14 09:33

weixin_49186516的博客运行如下代码的时候出现标题上的报错，将wandb更新到最新版本或者降为老版本都无法解决。将版本更新为3.20.0即可解决。原因是protobuf版本太高。
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
系统已结题 7月21日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
已采纳回答 7月13日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 12月12日

机器学习：AttributeError:

在机器学习中，清华大学开源的visualGLM-6B中sat模型，进行微调时报错！个人感觉可能依赖版本升级导致属性移除

AttributeError: 'FakeTokenizer' object has no attribute 'encode'

怎么解决呢？望指导

1条回答 默认 最新

问题事件

1条回答默认最新