对chatglm-6b-int4进行ptuning训练,按照官方教程运行bash train.sh后无法进行训练,报错AttributeError: 'NoneType' object has no attribute 'int4WeightExtractionHalf'
报错内容:
-1
06/14/2024 16:42:03 - WARNING - main - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False
06/14/2024 16:42:03 - INFO - main - Training/evaluation parameters Seq2SeqTrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_backend=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
generation_config=None,
generation_max_length=None,
generation_num_beams=None,
gradient_accumulation_steps=16,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=,
ignore_data_skip=False,
include_inputs_for_metrics=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=0.02,
length_column_name=length,
load_best_model_at_end=False,
local_rank=-1,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=output\qa-chatglm-6b-int4-pt-v2\runs\Jun14_16-42-03_DESKTOP-68C74C8,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=10,
logging_strategy=steps,
lr_scheduler_type=linear,
max_grad_norm=1.0,
max_steps=100,
metric_for_best_model=None,
mp_parameters=,
no_cuda=False,
num_train_epochs=3.0,
optim=adamw_hf,
optim_args=None,
output_dir=output\qa-chatglm-6b-int4-pt-v2,
overwrite_output_dir=True,
past_index=-1,
per_device_eval_batch_size=1,
per_device_train_batch_size=1,
predict_with_generate=True,
prediction_loss_only=False,
push_to_hub=False,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=,
ray_scope=last,
remove_unused_columns=True,
report_to=[],
resume_from_checkpoint=None,
run_name=output\qa-chatglm-6b-int4-pt-v2,
save_on_each_node=False,
save_safetensors=False,
save_steps=50,
save_strategy=steps,
save_total_limit=None,
seed=42,
sharded_ddp=[],
skip_memory_metrics=True,
sortish_sampler=False,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.0,
warmup_steps=0,
weight_decay=0.0,
xpu_backend=None,
)
D:\Anaconda\envs\DL\lib\site-packages\datasets\load.py:2552: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=' instead.
warnings.warn(
[INFO|configuration_utils.py:667] 2024-06-14 16:42:04,933 >> loading configuration file D:\langchain-model\chatglm-6b-int4\config.json
[INFO|configuration_utils.py:667] 2024-06-14 16:42:04,935 >> loading configuration file D:\langchain-model\chatglm-6b-int4\config.json
[INFO|configuration_utils.py:725] 2024-06-14 16:42:04,935 >> Model config ChatGLMConfig {
"_name_or_path": "D:\\langchain-model\\chatglm-6b-int4",
"architectures": [
"ChatGLMModel"
],
"auto_map": {
"AutoConfig": "configuration_chatglm.ChatGLMConfig",
"AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
"AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration"
},
"bos_token_id": 130004,
"eos_token_id": 130005,
"gmask_token_id": 130001,
"hidden_size": 4096,
"inner_hidden_size": 16384,
"layernorm_epsilon": 1e-05,
"mask_token_id": 130000,
"max_sequence_length": 2048,
"model_type": "chatglm",
"num_attention_heads": 32,
"num_layers": 28,
"pad_token_id": 3,
"position_encoding_2d": true,
"pre_seq_len": null,
"prefix_projection": false,
"quantization_bit": 4,
"quantization_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.30.0",
"use_cache": true,
"vocab_size": 130528
}
[INFO|tokenization_utils_base.py:1821] 2024-06-14 16:42:04,937 >> loading file ice_text.model
[INFO|tokenization_utils_base.py:1821] 2024-06-14 16:42:04,937 >> loading file added_tokens.json
[INFO|tokenization_utils_base.py:1821] 2024-06-14 16:42:04,937 >> loading file special_tokens_map.json
[INFO|tokenization_utils_base.py:1821] 2024-06-14 16:42:04,937 >> loading file tokenizer_config.json
[INFO|modeling_utils.py:2575] 2024-06-14 16:42:05,145 >> loading weights file D:\langchain-model\chatglm-6b-int4\pytorch_model.bin
[INFO|configuration_utils.py:577] 2024-06-14 16:42:06,454 >> Generate config GenerationConfig {
"_from_model_config": true,
"bos_token_id": 130004,
"eos_token_id": 130005,
"pad_token_id": 3,
"transformers_version": "4.30.0"
}
--- Logging error ---
Traceback (most recent call last):
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 19, in
from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
File "D:\Anaconda\envs\DL\lib\site-packages\cpm_kernels_init.py", line 1, in
from . import library
File "D:\Anaconda\envs\DL\lib\site-packages\cpm_kernels\library_init.py", line 2, in
from . import cuda
File "D:\Anaconda\envs\DL\lib\site-packages\cpm_kernels\library\cuda.py", line 7, in
cuda = Lib.from_lib("cuda", ctypes.WinDLL("nvcuda.dll"))
File "D:\Anaconda\envs\DL\lib\site-packages\cpm_kernels\library\base.py", line 63, in from_lib
ret = Lib(name)
File "D:\Anaconda\envs\DL\lib\site-packages\cpm_kernels\library\base.py", line 45, in init
lib_path = windows_find_lib(self.__name)
File "D:\Anaconda\envs\DL\lib\site-packages\cpm_kernels\library\base.py", line 39, in windows_find_lib
return lookup_dll(lib_name)
File "D:\Anaconda\envs\DL\lib\site-packages\cpm_kernels\library\base.py", line 16, in lookup_dll
for name in os.listdir(path):
NotADirectoryError: [WinError 267] Ŀ¼▒▒▒▒▒▒Ч▒▒: 'C:\Windows\SysWOW64\WindowsPowerShell\v1.0\powershell.exe'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\Anaconda\envs\DL\lib\logging_init.py", line 1083, in emit
msg = self.format(record)
File "D:\Anaconda\envs\DL\lib\logging_init.py", line 927, in format
return fmt.format(record)
File "D:\Anaconda\envs\DL\lib\logging_init.py", line 663, in format
record.message = record.getMessage()
File "D:\Anaconda\envs\DL\lib\logging_init.py", line 367, in getMessage
msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
File "D:\langchain-model\chatglm-6b-int4\ChatGLM-6B-main\ChatGLM-6B-main\ptuning\main.py", line 433, in
main()
File "D:\langchain-model\chatglm-6b-int4\ChatGLM-6B-main\ChatGLM-6B-main\ptuning\main.py", line 127, in main
model = AutoModel.from_pretrained(model_args.model_name_or_path, config=config, trust_remote_code=True)
File "D:\Anaconda\envs\DL\lib\site-packages\transformers\models\auto\auto_factory.py", line 479, in from_pretrained
return model_class.from_pretrained(
File "D:\Anaconda\envs\DL\lib\site-packages\transformers\modeling_utils.py", line 2675, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 1061, in init
self.quantize(self.config.quantization_bit, self.config.quantization_embeddings, use_quantization_cache=True, empty_init=True)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 1424, in quantize
from .quantization import quantize, QuantizedEmbedding, QuantizedLinear, load_cpu_kernel
File "", line 1007, in _find_and_load
File "", line 986, in _find_and_load_unlocked
File "", line 680, in _load_unlocked
File "", line 850, in exec_module
File "", line 228, in _call_with_frames_removed
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 46, in
logger.warning("Failed to load cpm_kernels:", exception)
Message: 'Failed to load cpm_kernels:'
Arguments: (NotADirectoryError(20, 'Ŀ¼▒▒▒▒▒▒Ч▒▒'),)
[INFO|modeling_utils.py:3295] 2024-06-14 16:42:07,190 >> All model checkpoint weights were used when initializing ChatGLMForConditionalGeneration.
[WARNING|modeling_utils.py:3297] 2024-06-14 16:42:07,190 >> Some weights of ChatGLMForConditionalGeneration were not initialized from the model checkpoint at D:\langchain-model\chatglm-6b-int4 and are newly initialized: ['transformer.prefix_encoder.embedding.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[INFO|modeling_utils.py:2927] 2024-06-14 16:42:07,206 >> Generation config file not found, using a generation config created from the model config.
Load kernel : D:\langchain-model\chatglm-6b-int4\quantization_kernels.so
No set_num_threads() found in kernel.
Setting CPU quantization kernel threads to 10
Using quantization cache
Applying quantization to glm layers
Quantized to 4 bit
Load kernel : D:\langchain-model\chatglm-6b-int4\quantization_kernels.so
No set_num_threads() found in kernel.
Setting CPU quantization kernel threads to 10
Running tokenizer on train dataset: 0%| | 0/255 [00:00<?, ? examples/Running tokenizer on train dataset: 100%|▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒| 255/255 [00:00<00Running tokenizer on train dataset: 100%|▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒| 255/255 [00:00<00:00, 1219.18 examples/s]
D:\Anaconda\envs\DL\lib\site-packages\transformers\optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set no_deprecation_warning=True
to disable this warning
warnings.warn(
input_ids [526, 107, 23188, 1711, 5, 51260, 12902, 174, 4833, 27442, 31, 130001, 130004, 23188, 1711, 5, 51260, 12902, 174, 4833, 27442, 6, 325, 12473, 107, 12, 27977, 35805, 1711, 5, 51260, 12902, 174, 5, 63599, 106, 5, 6, 325, 992, 19414, 182, 12076, 118, 12, 35145, 2560, 16061, 6, 123, 7145, 106, 5, 38326, 106, 174, 10468, 1711, 26525, 125895, 4289, 28042, 435, 3131, 150, 5, 30579, 6, 325, 2477, 10156, 118, 12, 7305, 2742, 201, 14811, 1285, 435, 3946, 13708, 201, 26525, 125895, 460, 2934, 1341, 435, 115, 25428, 119, 3072, 125895, 222, 13981, 39842, 435, 22278, 16327, 210, 794, 125895, 25595, 2369, 1341, 6, 60110, 102, 5305, 118, 12, 7305, 32576, 5302, 985, 16914, 17897, 6, 7338, 435, 19648, 30027, 6, 13193, 35805, 941, 101, 590, 1139, 12, 12372, 27442, 6, 44006, 2566, 941, 12, 56973, 12902, 130005, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
inputs What is Syndrome Of Excessive Heat In Water Wheel? Syndrome Of Excessive Heat In Water Wheel,the alias is:Syndrome Of Excessive Heat In Pupils,the primary syptom are:Blurred Vision, The Clouds Flutters In Front Of Eyes||Even Blind And Lose Sight,the secondary symptom are:Reddened Complexion And Congested Eyes||Vexation And Irritability||Bitter Taste And Dry Throat||Constipation,tongue and pulse are:Red Tongue With Yellow Fur, Full And Rapid Pulse,syndrome element of disease location:Water Wheel,pathogenic syndrome element:Excess Heat
label_ids [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 130004, 23188, 1711, 5, 51260, 12902, 174, 4833, 27442, 6, 325, 12473, 107, 12, 27977, 35805, 1711, 5, 51260, 12902, 174, 5, 63599, 106, 5, 6, 325, 992, 19414, 182, 12076, 118, 12, 35145, 2560, 16061, 6, 123, 7145, 106, 5, 38326, 106, 174, 10468, 1711, 26525, 125895, 4289, 28042, 435, 3131, 150, 5, 30579, 6, 325, 2477, 10156, 118, 12, 7305, 2742, 201, 14811, 1285, 435, 3946, 13708, 201, 26525, 125895, 460, 2934, 1341, 435, 115, 25428, 119, 3072, 125895, 222, 13981, 39842, 435, 22278, 16327, 210, 794, 125895, 25595, 2369, 1341, 6, 60110, 102, 5305, 118, 12, 7305, 32576, 5302, 985, 16914, 17897, 6, 7338, 435, 19648, 30027, 6, 13193, 35805, 941, 101, 590, 1139, 12, 12372, 27442, 6, 44006, 2566, 941, 12, 56973, 12902, 130005, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]
labels Syndrome Of Excessive Heat In Water Wheel,the alias is:Syndrome Of Excessive Heat In Pupils,the primary syptom are:Blurred Vision, The Clouds Flutters In Front Of Eyes||Even Blind And Lose Sight,the secondary symptom are:Reddened Complexion And Congested Eyes||Vexation And Irritability||Bitter Taste And Dry Throat||Constipation,tongue and pulse are:Red Tongue With Yellow Fur, Full And Rapid Pulse,syndrome element of disease location:Water Wheel,pathogenic syndrome element:Excess Heat
0%| | 0/100 [00:00<?, ?it/s]06/14/2024 16:42:09 - WARNING - transformers_modules.chatglm-6b-int4.modeling_chatglm - use_cache=True
is incompatible with gradient checkpointing. Setting use_cache=False
...
D:\Anaconda\envs\DL\lib\site-packages\torch\utils\checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
warnings.warn(
Traceback (most recent call last):
File "D:\langchain-model\chatglm-6b-int4\ChatGLM-6B-main\ChatGLM-6B-main\ptuning\main.py", line 433, in
main()
File "D:\langchain-model\chatglm-6b-int4\ChatGLM-6B-main\ChatGLM-6B-main\ptuning\main.py", line 372, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "D:\langchain-model\chatglm-6b-int4\ChatGLM-6B-main\ChatGLM-6B-main\ptuning\trainer.py", line 1635, in train
return inner_training_loop(
File "D:\langchain-model\chatglm-6b-int4\ChatGLM-6B-main\ChatGLM-6B-main\ptuning\trainer.py", line 1904, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "D:\langchain-model\chatglm-6b-int4\ChatGLM-6B-main\ChatGLM-6B-main\ptuning\trainer.py", line 2647, in training_step
loss = self.compute_loss(model, inputs)
File "D:\langchain-model\chatglm-6b-int4\ChatGLM-6B-main\ChatGLM-6B-main\ptuning\trainer.py", line 2679, in compute_loss
outputs = model(**inputs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 985, in forward
layer_ret = torch.utils.checkpoint.checkpoint(
File "D:\Anaconda\envs\DL\lib\site-packages\torch_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch_dynamo\eval_frame.py", line 328, in _fn
return fn(*args, **kwargs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch_dynamo\external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\utils\checkpoint.py", line 451, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\autograd\function.py", line 539, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "D:\Anaconda\envs\DL\lib\site-packages\torch\utils\checkpoint.py", line 230, in forward
outputs = run_function(*args)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 627, in forward
attention_outputs = self.attention(
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\modeling_chatglm.py", line 445, in forward
mixed_raw_layer = self.query_key_value(hidden_states)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 393, in forward
output = W8A16Linear.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "D:\Anaconda\envs\DL\lib\site-packages\torch\autograd\function.py", line 539, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 56, in forward
weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 276, in extract_weight_to_half
func = kernels.int4WeightExtractionHalf
AttributeError: 'NoneType' object has no attribute 'int4WeightExtractionHalf'
0%| | 0/100 [00:00<?, ?it/s]