XUST_Alon 2022-10-08 18:30 采纳率: 100%
浏览 612
已结题

使用fairseq-train进行训练时,遇到了unrecognized arguments: 参数路径的错误

使用fairseq-train进行训练时,遇到了unrecognized arguments: 参数路径的错误
代码内容
#!/usr/bin/env bash

ulimit -n 2648
ulimit -n

CODE_PATH=.
cd ${CODE_PATH}

pwd
export PYTHONPATH=${CODE_PATH}:$PYTHONPATH

PROBLEM=gigaword
DATA_DIR=./data-bin/ggw
ARCH=man_base
VERSION=$1
USER_DIR=./model

SAVE_DIR=./log/${PROBLEM}/${ARCH}_v${VERSION}

echo PROBLEM: ${PROBLEM}
echo ARCH: ${ARCH}
echo SAVE_DIR: ${SAVE_DIR}

mkdir -p ${SAVE_DIR}

fairseq-train ${DATA_DIR} --seed 1 \
    --task man --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
    --arch ${ARCH} \
    --lr 8e-4 \
    --optimizer adam --adam-betas "(0.9,0.98)" --adam-eps 1e-6 --weight-decay 0.01 \
    --clip-norm 0.1 \
    --lr-scheduler inverse_sqrt --warmup-init-lr 1e-7 --warmup-updates 8000 \
    --dropout 0.1 --attention-dropout 0.1 \
    --max-tokens 12288 --update-freq 3 \
    --max-source-positions 512 --max-target-positions 512 \
    --skip-invalid-size-inputs-valid-test \
    --num-workers 4 --ddp-backend no_c10d \
    --save-dir ${SAVE_DIR} \
    --max-update 50000 --log-format simple --log-interval 1000 \
    --user-dir ${USER_DIR} \
| tee -a ${SAVE_DIR}/train_log.txt
运行结果及报错内容
13664@LAPTOP-ZHUANG MINGW64 /d/Desktop/hugg/hugg/Scripts/MAN-main/summarization
$ bash ./scripts/summarization/train/train-man-base-gigaword.sh
3200
/d/Desktop/hugg/hugg/Scripts/MAN-main/summarization
PROBLEM: gigaword
ARCH: man_base
SAVE_DIR: ./log/gigaword/man_base_v
usage: fairseq-train [-h] [--no-progress-bar] [--log-interval LOG_INTERVAL]
                     [--log-format {json,none,simple,tqdm}]
                     [--log-file LOG_FILE] [--aim-repo AIM_REPO]
                     [--aim-run-hash AIM_RUN_HASH]
                     [--tensorboard-logdir TENSORBOARD_LOGDIR]
                     [--wandb-project WANDB_PROJECT] [--azureml-logging]
                     [--seed SEED] [--cpu] [--tpu] [--bf16]
                     [--memory-efficient-bf16] [--fp16]
                     [--memory-efficient-fp16] [--fp16-no-flatten-grads]
                     [--fp16-init-scale FP16_INIT_SCALE]
                     [--fp16-scale-window FP16_SCALE_WINDOW]
                     [--fp16-scale-tolerance FP16_SCALE_TOLERANCE]
                     [--on-cpu-convert-precision]
                     [--min-loss-scale MIN_LOSS_SCALE]
                     [--threshold-loss-scale THRESHOLD_LOSS_SCALE] [--amp]
                     [--amp-batch-retries AMP_BATCH_RETRIES]
                     [--amp-init-scale AMP_INIT_SCALE]
                     [--amp-scale-window AMP_SCALE_WINDOW]
                     [--user-dir USER_DIR]
                     [--empty-cache-freq EMPTY_CACHE_FREQ]
                     [--all-gather-list-size ALL_GATHER_LIST_SIZE]
                     [--model-parallel-size MODEL_PARALLEL_SIZE]
                     [--quantization-config-path QUANTIZATION_CONFIG_PATH]
                     [--profile] [--reset-logging] [--suppress-crashes]
                     [--use-plasma-view] [--plasma-path PLASMA_PATH]
                     [--criterion {adaptive_loss,composite_loss,cross_entropy,ctc,fastspeech2,hubert,label_smoothed_cross_entropy,latency_augmented_label_smoothed_cross_entrop
y,label_smoothed_cross_entropy_with_alignment,label_smoothed_cross_entropy_with_ctc,legacy_masked_lm_loss,masked_lm,model,nat_loss,sentence_prediction,sentence_prediction_adapters,sentence_ranking,tacotron2,speech_to_unit,speech_to_spectrogram,speech_unit_lm_criterion,wav2vec,vocab_parallel_cross_entropy,masked_lm2}]
                     [--tokenizer {moses,nltk,space}]
                     [--bpe {byte_bpe,bytes,characters,fastbpe,gpt2,bert,hf_byte_bpe,sentencepiece,subword_nmt}]
                     [--optimizer {adadelta,adafactor,adagrad,adam,adamax,composite,cpu_adam,lamb,nag,sgd}]
                     [--lr-scheduler {cosine,fixed,inverse_sqrt,manual,pass_through,polynomial_decay,reduce_lr_on_plateau,step,tri_stage,triangular}]
                     [--scoring {bert_score,sacrebleu,bleu,chrf,meteor,wer}]
                     [--task TASK] [--num-workers NUM_WORKERS]
                     [--skip-invalid-size-inputs-valid-test]
                     [--max-tokens MAX_TOKENS] [--batch-size BATCH_SIZE]
                     [--required-batch-size-multiple REQUIRED_BATCH_SIZE_MULTIPLE]
                     [--required-seq-len-multiple REQUIRED_SEQ_LEN_MULTIPLE]
                     [--dataset-impl {raw,lazy,cached,mmap,fasta,huffman}]
                     [--data-buffer-size DATA_BUFFER_SIZE]
                     [--train-subset TRAIN_SUBSET]
                     [--valid-subset VALID_SUBSET] [--combine-valid-subsets]
                     [--ignore-unused-valid-subsets]
                     [--validate-interval VALIDATE_INTERVAL]
                     [--validate-interval-updates VALIDATE_INTERVAL_UPDATES]
                     [--validate-after-updates VALIDATE_AFTER_UPDATES]
                     [--fixed-validation-seed FIXED_VALIDATION_SEED]
                     [--disable-validation]
                     [--max-tokens-valid MAX_TOKENS_VALID]
                     [--batch-size-valid BATCH_SIZE_VALID]
                     [--max-valid-steps MAX_VALID_STEPS]
                     [--curriculum CURRICULUM] [--gen-subset GEN_SUBSET]
                     [--num-shards NUM_SHARDS] [--shard-id SHARD_ID]
                     [--grouped-shuffling]
                     [--update-epoch-batch-itr UPDATE_EPOCH_BATCH_ITR]
                     [--update-ordered-indices-seed]
                     [--distributed-world-size DISTRIBUTED_WORLD_SIZE]
                     [--distributed-num-procs DISTRIBUTED_NUM_PROCS]
                     [--distributed-rank DISTRIBUTED_RANK]
                     [--distributed-backend DISTRIBUTED_BACKEND]
                     [--distributed-init-method DISTRIBUTED_INIT_METHOD]
                     [--distributed-port DISTRIBUTED_PORT]
                     [--device-id DEVICE_ID] [--distributed-no-spawn]
                     [--ddp-backend {c10d,fully_sharded,legacy_ddp,no_c10d,pytorch_ddp,slowmo}]
                     [--ddp-comm-hook {none,fp16}]
                     [--bucket-cap-mb BUCKET_CAP_MB] [--fix-batches-to-gpus]
                     [--find-unused-parameters] [--gradient-as-bucket-view]
                     [--fast-stat-sync]
                     [--heartbeat-timeout HEARTBEAT_TIMEOUT]
                     [--broadcast-buffers] [--slowmo-momentum SLOWMO_MOMENTUM]
                     [--slowmo-base-algorithm SLOWMO_BASE_ALGORITHM]
                     [--localsgd-frequency LOCALSGD_FREQUENCY]
                     [--nprocs-per-node NPROCS_PER_NODE]
                     [--pipeline-model-parallel]
                     [--pipeline-balance PIPELINE_BALANCE]
                     [--pipeline-devices PIPELINE_DEVICES]
                     [--pipeline-chunks PIPELINE_CHUNKS]
                     [--pipeline-encoder-balance PIPELINE_ENCODER_BALANCE]
                     [--pipeline-encoder-devices PIPELINE_ENCODER_DEVICES]
                     [--pipeline-decoder-balance PIPELINE_DECODER_BALANCE]
                     [--pipeline-decoder-devices PIPELINE_DECODER_DEVICES]
                     [--pipeline-checkpoint {always,never,except_last}]
                     [--zero-sharding {none,os}] [--no-reshard-after-forward]
                     [--fp32-reduce-scatter] [--cpu-offload]
                     [--use-sharded-state] [--not-fsdp-flatten-parameters]
                     [--arch ARCH] [--max-epoch MAX_EPOCH]
                     [--max-update MAX_UPDATE]
                     [--stop-time-hours STOP_TIME_HOURS]
                     [--clip-norm CLIP_NORM] [--sentence-avg]
                     [--update-freq UPDATE_FREQ] [--lr LR]
                     [--stop-min-lr STOP_MIN_LR] [--use-bmuf]
                     [--skip-remainder-batch] [--save-dir SAVE_DIR]
                     [--restore-file RESTORE_FILE]
                     [--continue-once CONTINUE_ONCE]
                     [--finetune-from-model FINETUNE_FROM_MODEL]
                     [--reset-dataloader] [--reset-lr-scheduler]
                     [--reset-meters] [--reset-optimizer]
                     [--optimizer-overrides OPTIMIZER_OVERRIDES]
                     [--save-interval SAVE_INTERVAL]
                     [--save-interval-updates SAVE_INTERVAL_UPDATES]
                     [--keep-interval-updates KEEP_INTERVAL_UPDATES]
                     [--keep-interval-updates-pattern KEEP_INTERVAL_UPDATES_PATTERN]
                     [--keep-last-epochs KEEP_LAST_EPOCHS]
                     [--keep-best-checkpoints KEEP_BEST_CHECKPOINTS]
                     [--no-save] [--no-epoch-checkpoints]
                     [--no-last-checkpoints] [--no-save-optimizer-state]
                     [--best-checkpoint-metric BEST_CHECKPOINT_METRIC]
                     [--maximize-best-checkpoint-metric] [--patience PATIENCE]
                     [--checkpoint-suffix CHECKPOINT_SUFFIX]
                     [--checkpoint-shard-count CHECKPOINT_SHARD_COUNT]
                     [--load-checkpoint-on-all-dp-ranks]
                     [--write-checkpoints-asynchronously] [--store-ema]
                     [--ema-decay EMA_DECAY]
                     [--ema-start-update EMA_START_UPDATE]
                     [--ema-seed-model EMA_SEED_MODEL]
                     [--ema-update-freq EMA_UPDATE_FREQ] [--ema-fp32]
                     [--activation-fn {relu,gelu,gelu_fast,gelu_accurate,tanh,linear}]
                     [--dropout D] [--attention-dropout D]
                     [--activation-dropout D] [--encoder-embed-dim N]
                     [--encoder-ffn-embed-dim N] [--encoder-layers N]
                     [--encoder-mix-layers N] [--encoder-attention-heads N]
                     [--decoder-embed-dim N] [--decoder-ffn-embed-dim N]
                     [--decoder-layers N] [--decoder-mix-layers N]
                     [--decoder-attention-heads N] [--share-all-embeddings]
                     [--load-from-pretrained-model LOAD_FROM_PRETRAINED_MODEL]
                     [--label-smoothing LABEL_SMOOTHING] [--report-accuracy]
                     [--ignore-prefix-size IGNORE_PREFIX_SIZE]
                     [--adam-betas ADAM_BETAS] [--adam-eps ADAM_EPS]
                     [--weight-decay WEIGHT_DECAY] [--use-old-adam]
                     [--fp16-adam-stats] [--warmup-updates WARMUP_UPDATES]
                     [--warmup-init-lr WARMUP_INIT_LR] [--pad PAD] [--eos EOS]
                     [--unk UNK]
fairseq-train: error: unrecognized arguments: ./data-bin/ggw --max-source-positions 512 --max-target-positions 512
各种方法都尝试过了,其中主要是data_dir这个参数的问题

有相似问题的,如果解决了,希望能分享一下方法,谢谢!

  • 写回答

0条回答 默认 最新

    报告相同问题?

    问题事件

    • 系统已结题 10月16日
    • 创建了问题 10月8日

    悬赏问题

    • ¥15 Opencv配置出错
    • ¥15 模电中二极管,三极管和电容的应用
    • ¥15 关于模型导入UNITY的.FBX: Check external application preferences.警告。
    • ¥15 气象网格数据与卫星轨道数据如何匹配
    • ¥100 java ee ssm项目 悬赏,感兴趣直接联系我
    • ¥15 微软账户问题不小心注销了好像
    • ¥15 x264库中预测模式字IPM、运动向量差MVD、量化后的DCT系数的位置
    • ¥15 curl 命令调用正常,程序调用报 java.net.ConnectException: connection refused
    • ¥20 关于web前端如何播放二次加密m3u8视频的问题
    • ¥15 使用百度地图api 位置函数报错?