大模型微调,Running Evaluation过程中出现的错误
LLaMA-Factory为微调工具,相关参数设置如下:
CUDA_VISIBLE_DEVICES=0 /opt/llama_factory/bin/python src/train_bash.py \
--stage sft \
--model_name_or_path pretrained_models/Qwen/Qwen1.5-0.5B/ \
--do_train \
--dataset llama_input_dataset \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--output_dir output_finetuning_models/Qwen/Qwen1.5-0.5B/lora/sft \
--overwrite_cache \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 16 \
--preprocessing_num_workers 16 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--save_steps 10000 \
--learning_rate 5e-4 \
--max_grad_norm 0.5 \
--num_train_epochs 3 \
--evaluation_strategy steps \
--bf16 \
--template qwen \
--overwrite_output_dir \
--cutoff_len 1024 \
--quantization_bit 4 \
--plot_loss \
--load_best_model_at_end True \
--val_size 0.001
运行后报错,报错内容如下:
[INFO|trainer.py:3614] 2024-05-01 20:47:32,810 >> ***** Running Evaluation *****
[INFO|trainer.py:3616] 2024-05-01 20:47:32,810 >> Num examples = 20
[INFO|trainer.py:3619] 2024-05-01 20:47:32,810 >> Batch size = 4
Traceback (most recent call last):
File "/home/bio/workshop/hx/LLaMA-Factory/src/train_bash.py", line 14, in <module>
main()
File "/home/bio/workshop/hx/LLaMA-Factory/src/train_bash.py", line 5, in main
run_exp()
File "/home/bio/workshop/hx/LLaMA-Factory/src/llmtuner/train/tuner.py", line 32, in run_exp
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/home/bio/workshop/hx/LLaMA-Factory/src/llmtuner/train/sft/workflow.py", line 73, in run_sft
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
File "/home/bio/.local/lib/python3.10/site-packages/transformers/trainer.py", line 1859, in train
return inner_training_loop(
File "/home/bio/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2278, in _inner_training_loop
self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
File "/home/bio/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2662, in _maybe_log_save_evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
File "/home/bio/.local/lib/python3.10/site-packages/transformers/trainer_seq2seq.py", line 180, in evaluate
return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
File "/home/bio/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3467, in evaluate
output = eval_loop(
File "/home/bio/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3719, in evaluation_loop
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
File "/home/bio/workshop/hx/LLaMA-Factory/src/llmtuner/train/sft/metric.py", line 46, in __call__
decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
File "/home/bio/.local/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3771, in batch_decode
return [
File "/home/bio/.local/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3772, in <listcomp>
self.decode(
File "/home/bio/.local/lib/python3.10/site-packages/transformers/models/qwen2/tokenization_qwen2.py", line 299, in decode
return super().decode(
File "/home/bio/.local/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3811, in decode
return self._decode(
File "/home/bio/.local/lib/python3.10/site-packages/transformers/tokenization_utils.py", line 1001, in _decode
filtered_tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
File "/home/bio/.local/lib/python3.10/site-packages/transformers/tokenization_utils.py", line 976, in convert_ids_to_tokens
index = int(index)
TypeError: int() argument must be a string, a bytes-like object or a real number, not 'list'
是我的数据集本身的问题吗??为什么Running training时没有出错呢??