使用fairseq-train进行训练时,遇到了unrecognized arguments: 参数路径的错误
代码内容
#!/usr/bin/env bash
ulimit -n 2648
ulimit -n
CODE_PATH=.
cd ${CODE_PATH}
pwd
export PYTHONPATH=${CODE_PATH}:$PYTHONPATH
PROBLEM=gigaword
DATA_DIR=./data-bin/ggw
ARCH=man_base
VERSION=$1
USER_DIR=./model
SAVE_DIR=./log/${PROBLEM}/${ARCH}_v${VERSION}
echo PROBLEM: ${PROBLEM}
echo ARCH: ${ARCH}
echo SAVE_DIR: ${SAVE_DIR}
mkdir -p ${SAVE_DIR}
fairseq-train ${DATA_DIR} --seed 1 \
--task man --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
--arch ${ARCH} \
--lr 8e-4 \
--optimizer adam --adam-betas "(0.9,0.98)" --adam-eps 1e-6 --weight-decay 0.01 \
--clip-norm 0.1 \
--lr-scheduler inverse_sqrt --warmup-init-lr 1e-7 --warmup-updates 8000 \
--dropout 0.1 --attention-dropout 0.1 \
--max-tokens 12288 --update-freq 3 \
--max-source-positions 512 --max-target-positions 512 \
--skip-invalid-size-inputs-valid-test \
--num-workers 4 --ddp-backend no_c10d \
--save-dir ${SAVE_DIR} \
--max-update 50000 --log-format simple --log-interval 1000 \
--user-dir ${USER_DIR} \
| tee -a ${SAVE_DIR}/train_log.txt
运行结果及报错内容
13664@LAPTOP-ZHUANG MINGW64 /d/Desktop/hugg/hugg/Scripts/MAN-main/summarization
$ bash ./scripts/summarization/train/train-man-base-gigaword.sh
3200
/d/Desktop/hugg/hugg/Scripts/MAN-main/summarization
PROBLEM: gigaword
ARCH: man_base
SAVE_DIR: ./log/gigaword/man_base_v
usage: fairseq-train [-h] [--no-progress-bar] [--log-interval LOG_INTERVAL]
[--log-format {json,none,simple,tqdm}]
[--log-file LOG_FILE] [--aim-repo AIM_REPO]
[--aim-run-hash AIM_RUN_HASH]
[--tensorboard-logdir TENSORBOARD_LOGDIR]
[--wandb-project WANDB_PROJECT] [--azureml-logging]
[--seed SEED] [--cpu] [--tpu] [--bf16]
[--memory-efficient-bf16] [--fp16]
[--memory-efficient-fp16] [--fp16-no-flatten-grads]
[--fp16-init-scale FP16_INIT_SCALE]
[--fp16-scale-window FP16_SCALE_WINDOW]
[--fp16-scale-tolerance FP16_SCALE_TOLERANCE]
[--on-cpu-convert-precision]
[--min-loss-scale MIN_LOSS_SCALE]
[--threshold-loss-scale THRESHOLD_LOSS_SCALE] [--amp]
[--amp-batch-retries AMP_BATCH_RETRIES]
[--amp-init-scale AMP_INIT_SCALE]
[--amp-scale-window AMP_SCALE_WINDOW]
[--user-dir USER_DIR]
[--empty-cache-freq EMPTY_CACHE_FREQ]
[--all-gather-list-size ALL_GATHER_LIST_SIZE]
[--model-parallel-size MODEL_PARALLEL_SIZE]
[--quantization-config-path QUANTIZATION_CONFIG_PATH]
[--profile] [--reset-logging] [--suppress-crashes]
[--use-plasma-view] [--plasma-path PLASMA_PATH]
[--criterion {adaptive_loss,composite_loss,cross_entropy,ctc,fastspeech2,hubert,label_smoothed_cross_entropy,latency_augmented_label_smoothed_cross_entrop
y,label_smoothed_cross_entropy_with_alignment,label_smoothed_cross_entropy_with_ctc,legacy_masked_lm_loss,masked_lm,model,nat_loss,sentence_prediction,sentence_prediction_adapters,sentence_ranking,tacotron2,speech_to_unit,speech_to_spectrogram,speech_unit_lm_criterion,wav2vec,vocab_parallel_cross_entropy,masked_lm2}]
[--tokenizer {moses,nltk,space}]
[--bpe {byte_bpe,bytes,characters,fastbpe,gpt2,bert,hf_byte_bpe,sentencepiece,subword_nmt}]
[--optimizer {adadelta,adafactor,adagrad,adam,adamax,composite,cpu_adam,lamb,nag,sgd}]
[--lr-scheduler {cosine,fixed,inverse_sqrt,manual,pass_through,polynomial_decay,reduce_lr_on_plateau,step,tri_stage,triangular}]
[--scoring {bert_score,sacrebleu,bleu,chrf,meteor,wer}]
[--task TASK] [--num-workers NUM_WORKERS]
[--skip-invalid-size-inputs-valid-test]
[--max-tokens MAX_TOKENS] [--batch-size BATCH_SIZE]
[--required-batch-size-multiple REQUIRED_BATCH_SIZE_MULTIPLE]
[--required-seq-len-multiple REQUIRED_SEQ_LEN_MULTIPLE]
[--dataset-impl {raw,lazy,cached,mmap,fasta,huffman}]
[--data-buffer-size DATA_BUFFER_SIZE]
[--train-subset TRAIN_SUBSET]
[--valid-subset VALID_SUBSET] [--combine-valid-subsets]
[--ignore-unused-valid-subsets]
[--validate-interval VALIDATE_INTERVAL]
[--validate-interval-updates VALIDATE_INTERVAL_UPDATES]
[--validate-after-updates VALIDATE_AFTER_UPDATES]
[--fixed-validation-seed FIXED_VALIDATION_SEED]
[--disable-validation]
[--max-tokens-valid MAX_TOKENS_VALID]
[--batch-size-valid BATCH_SIZE_VALID]
[--max-valid-steps MAX_VALID_STEPS]
[--curriculum CURRICULUM] [--gen-subset GEN_SUBSET]
[--num-shards NUM_SHARDS] [--shard-id SHARD_ID]
[--grouped-shuffling]
[--update-epoch-batch-itr UPDATE_EPOCH_BATCH_ITR]
[--update-ordered-indices-seed]
[--distributed-world-size DISTRIBUTED_WORLD_SIZE]
[--distributed-num-procs DISTRIBUTED_NUM_PROCS]
[--distributed-rank DISTRIBUTED_RANK]
[--distributed-backend DISTRIBUTED_BACKEND]
[--distributed-init-method DISTRIBUTED_INIT_METHOD]
[--distributed-port DISTRIBUTED_PORT]
[--device-id DEVICE_ID] [--distributed-no-spawn]
[--ddp-backend {c10d,fully_sharded,legacy_ddp,no_c10d,pytorch_ddp,slowmo}]
[--ddp-comm-hook {none,fp16}]
[--bucket-cap-mb BUCKET_CAP_MB] [--fix-batches-to-gpus]
[--find-unused-parameters] [--gradient-as-bucket-view]
[--fast-stat-sync]
[--heartbeat-timeout HEARTBEAT_TIMEOUT]
[--broadcast-buffers] [--slowmo-momentum SLOWMO_MOMENTUM]
[--slowmo-base-algorithm SLOWMO_BASE_ALGORITHM]
[--localsgd-frequency LOCALSGD_FREQUENCY]
[--nprocs-per-node NPROCS_PER_NODE]
[--pipeline-model-parallel]
[--pipeline-balance PIPELINE_BALANCE]
[--pipeline-devices PIPELINE_DEVICES]
[--pipeline-chunks PIPELINE_CHUNKS]
[--pipeline-encoder-balance PIPELINE_ENCODER_BALANCE]
[--pipeline-encoder-devices PIPELINE_ENCODER_DEVICES]
[--pipeline-decoder-balance PIPELINE_DECODER_BALANCE]
[--pipeline-decoder-devices PIPELINE_DECODER_DEVICES]
[--pipeline-checkpoint {always,never,except_last}]
[--zero-sharding {none,os}] [--no-reshard-after-forward]
[--fp32-reduce-scatter] [--cpu-offload]
[--use-sharded-state] [--not-fsdp-flatten-parameters]
[--arch ARCH] [--max-epoch MAX_EPOCH]
[--max-update MAX_UPDATE]
[--stop-time-hours STOP_TIME_HOURS]
[--clip-norm CLIP_NORM] [--sentence-avg]
[--update-freq UPDATE_FREQ] [--lr LR]
[--stop-min-lr STOP_MIN_LR] [--use-bmuf]
[--skip-remainder-batch] [--save-dir SAVE_DIR]
[--restore-file RESTORE_FILE]
[--continue-once CONTINUE_ONCE]
[--finetune-from-model FINETUNE_FROM_MODEL]
[--reset-dataloader] [--reset-lr-scheduler]
[--reset-meters] [--reset-optimizer]
[--optimizer-overrides OPTIMIZER_OVERRIDES]
[--save-interval SAVE_INTERVAL]
[--save-interval-updates SAVE_INTERVAL_UPDATES]
[--keep-interval-updates KEEP_INTERVAL_UPDATES]
[--keep-interval-updates-pattern KEEP_INTERVAL_UPDATES_PATTERN]
[--keep-last-epochs KEEP_LAST_EPOCHS]
[--keep-best-checkpoints KEEP_BEST_CHECKPOINTS]
[--no-save] [--no-epoch-checkpoints]
[--no-last-checkpoints] [--no-save-optimizer-state]
[--best-checkpoint-metric BEST_CHECKPOINT_METRIC]
[--maximize-best-checkpoint-metric] [--patience PATIENCE]
[--checkpoint-suffix CHECKPOINT_SUFFIX]
[--checkpoint-shard-count CHECKPOINT_SHARD_COUNT]
[--load-checkpoint-on-all-dp-ranks]
[--write-checkpoints-asynchronously] [--store-ema]
[--ema-decay EMA_DECAY]
[--ema-start-update EMA_START_UPDATE]
[--ema-seed-model EMA_SEED_MODEL]
[--ema-update-freq EMA_UPDATE_FREQ] [--ema-fp32]
[--activation-fn {relu,gelu,gelu_fast,gelu_accurate,tanh,linear}]
[--dropout D] [--attention-dropout D]
[--activation-dropout D] [--encoder-embed-dim N]
[--encoder-ffn-embed-dim N] [--encoder-layers N]
[--encoder-mix-layers N] [--encoder-attention-heads N]
[--decoder-embed-dim N] [--decoder-ffn-embed-dim N]
[--decoder-layers N] [--decoder-mix-layers N]
[--decoder-attention-heads N] [--share-all-embeddings]
[--load-from-pretrained-model LOAD_FROM_PRETRAINED_MODEL]
[--label-smoothing LABEL_SMOOTHING] [--report-accuracy]
[--ignore-prefix-size IGNORE_PREFIX_SIZE]
[--adam-betas ADAM_BETAS] [--adam-eps ADAM_EPS]
[--weight-decay WEIGHT_DECAY] [--use-old-adam]
[--fp16-adam-stats] [--warmup-updates WARMUP_UPDATES]
[--warmup-init-lr WARMUP_INIT_LR] [--pad PAD] [--eos EOS]
[--unk UNK]
fairseq-train: error: unrecognized arguments: ./data-bin/ggw --max-source-positions 512 --max-target-positions 512
各种方法都尝试过了,其中主要是data_dir这个参数的问题
有相似问题的,如果解决了,希望能分享一下方法,谢谢!