Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save andrewor14/f1121b9b4c2ccc50e0cc1726859eb79e to your computer and use it in GitHub Desktop.
Save andrewor14/f1121b9b4c2ccc50e0cc1726859eb79e to your computer and use it in GitHub Desktop.
batch_size: 16
batch_size_val: 8
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Llama-3.2-3B-Instruct/
checkpoint_files:
- model-00001-of-00002.safetensors
- model-00002-of-00002.safetensors
model_type: LLAMA3_2
output_dir: /home/andrewor/local/logs/tune/Llama3.2-3B_qat
recipe_checkpoint: null
clip_grad_norm: null
compile: false
dataset:
_component_: torchtune.datasets.alpaca_cleaned_dataset
packed: false
split: train[:95%]
dataset_val:
_component_: torchtune.datasets.alpaca_cleaned_dataset
split: train[95%:]
device: cuda
dtype: bf16
enable_activation_checkpointing: true
enable_activation_offloading: false
epochs: 1
gradient_accumulation_steps: 8
log_every_n_steps: 1
log_peak_memory_stats: true
loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: /home/andrewor/local/logs/tune/Llama3.2-3B_qat/metrics
model:
_component_: torchtune.models.llama3_2.llama3_2_3b
optimizer:
_component_: torch.optim.AdamW
fused: true
lr: 2.0e-05
optimizer_in_bwd: false
output_dir: /home/andrewor/local/logs/tune/Llama3.2-3B_qat/metrics
profiler:
_component_: torchtune.training.setup_torch_profiler
active_steps: 2
cpu: true
cuda: true
enabled: false
num_cycles: 1
output_dir: /home/andrewor/local/logs/tune/Llama3.2-3B_qat/metrics/profiling_outputs
profile_memory: false
record_shapes: true
wait_steps: 5
warmup_steps: 3
with_flops: false
with_stack: false
quantizer:
_component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
groupsize: 32
resume_from_checkpoint: false
run_val_every_n_steps: null
seed: null
shuffle: true
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
max_seq_len: null
path: /tmp/Llama-3.2-3B-Instruct/original/tokenizer.model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment