Created
November 18, 2020 08:34
-
-
Save sksq96/550ab1e57d6b868e1e938f7f7f3cbb3c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WARMUP_UPDATES=15000 # Warmup the learning rate over this many updates | |
TOTAL_UPDATES=5000000 # Total number of training steps | |
MAX_TOKENS=1024 | |
MAX_POSITIONS=1024 # Num. positional embeddings (usually same as above) | |
PEAK_LR=[0.0006] # Peak learning rate, adjust as needed | |
UPDATE_FREQ=[2] # Increase the batch size 16x | |
MODEL=bart_large | |
DATA_DIR="/storage/webdatacommons/binarized/bpe/$MODEL" | |
fairseq-train $DATA_DIR \ | |
--arch $MODEL \ | |
--task denoising \ | |
--restore-file "../models/$MODEL/model.pt" \ | |
--share-all-embeddings \ | |
--layernorm-embedding \ | |
--share-decoder-input-output-embed \ | |
--adam-betas "(0.9, 0.98)" \ | |
--adam-eps 1e-06 \ | |
--disable-validation \ | |
--clip-norm 0.1 \ | |
--criterion cross_entropy \ | |
--ddp-backend=no_c10d \ | |
--fp16 \ | |
--fp16-init-scale 128 \ | |
--log-format json \ | |
--log-interval 10 \ | |
--lr $PEAK_LR \ | |
--lr-scheduler polynomial_decay \ | |
--mask-length span-poisson \ | |
--mask-random 0.1 \ | |
--mask 0.3 \ | |
--max-source-positions $MAX_POSITIONS \ | |
--max-target-positions $MAX_POSITIONS \ | |
--max-tokens $MAX_TOKENS \ | |
--max-update $TOTAL_UPDATES \ | |
--min-loss-scale 0.0001 \ | |
--min-lr -1 \ | |
--model-parallel-size 1 \ | |
--num-workers 2 \ | |
--optimizer adam \ | |
--permute 0 \ | |
--permute-sentences 1 \ | |
--poisson-lambda 3.5 \ | |
--power 1 \ | |
--replace-length 1 \ | |
--required-batch-size-multiple 1 \ | |
--sample-break-mode complete_doc \ | |
--save-dir models/ \ | |
--save-interval-updates 20000 \ | |
--save-interval 1 \ | |
--skip-invalid-size-inputs-valid-test \ | |
--tensorboard-logdir ./runs/ \ | |
--tokens-per-sample 512 \ | |
--total-num-update $TOTAL_UPDATES \ | |
--update-freq $UPDATE_FREQ \ | |
--warmup-updates $WARMUP_UPDATES \ | |
--weight-decay 0.01 \ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment