Skip to content

Instantly share code, notes, and snippets.

@verdimrc
Last active February 15, 2023 12:45
Show Gist options
  • Save verdimrc/4254223b881ddc018cef728e838fb675 to your computer and use it in GitHub Desktop.
Save verdimrc/4254223b881ddc018cef728e838fb675 to your computer and use it in GitHub Desktop.
Huggingface
#!/bin/bash
#set -aex
echo "PWD = $(pwd)"
: "${SM_NUM_GPUS:=4}"
: "${MODEL_NAME:=gpt2}"
: "${OUTPUT_ROOT:=/mnt/scratch}"
: "${TRAINING_JOB_NAME:=haha}"
OUTPUT_DIR=$OUTPUT_ROOT/${MODEL_NAME}-finetuned/$TRAINING_JOB_NAME
# https://docs.wandb.ai/guides/track/advanced/environment-variables
mkdir -p $OUTPUT_DIR
export WANDB_DIR=$OUTPUT_DIR
# [BEWARE] When set to online and wandb not configured properly (e.g., key,
# etc.), HF will ask how to proceed with Wandb and wait for your answer, which
# will cause your training job to "hang" forever when running on a cluster.
export WANDB_MODE=offline
declare -a OPTS=(
--report_to tensorboard wandb
# https://docs.wandb.ai/guides/integrations/huggingface
# If None, defaults to huggingface.
#--run_name RUN_NAME An optional descriptor for the run. Notably used for
# wandb logging. (default: None)
--model_name_or_path $MODEL_NAME
--tokenizer_name $MODEL_NAME
--train_file train_data/train_1669615964.csv
--validation_file train_data/val_1669615964.csv
--do_train
--do_eval
--evaluation_strategy=steps
#--logging_strategy=steps # Already the default
--logging_steps 1
--output_dir $OUTPUT_DIR
--logging_dir $OUTPUT_DIR
--num_train_epochs 3
--eval_steps 1
--gradient_accumulation_steps 32
--per_device_train_batch_size 4
--per_device_eval_batch_size 4
--gradient_checkpointing
--learning_rate 5e-06
--warmup_steps 10
--save_total_limit 1
--save_steps 2
--save_strategy epoch
)
echo torchrun --standalone --nnodes=1 --nproc_per_node=$SM_NUM_GPUS run_clm.py "${OPTS[@]}" "$@"
torchrun --standalone --nnodes=1 --nproc_per_node=$SM_NUM_GPUS run_clm.py "${OPTS[@]}" "$@"
[[ $? == 0 ]] || exit 1
find $OUTPUT_DIR -type f | xargs ls -alh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment