Skip to content

Instantly share code, notes, and snippets.

@andrewor14
Created August 29, 2025 21:50
Show Gist options
  • Select an option

  • Save andrewor14/b0364ac3cb8aa114e46b39d848fa5c8b to your computer and use it in GitHub Desktop.

Select an option

Save andrewor14/b0364ac3cb8aa114e46b39d848fa5c8b to your computer and use it in GitHub Desktop.
Unsloth QAT full finetuning test
# Based on https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-Alpaca.ipynb
# but with `full_finetuning=True` and without `get_peft_model`
import os
from unsloth import FastLanguageModel
from transformers import TextStreamer
import torch
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Meta-Llama-3.1-8B",
max_seq_length = max_seq_length,
dtype = torch.bfloat16,
load_in_4bit = False,
full_finetuning = True,
qat_scheme = "fp8-int4",
)
# ============
# Data prep |
# ============
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
instructions = examples["instruction"]
inputs = examples["input"]
outputs = examples["output"]
texts = []
for instruction, input, output in zip(instructions, inputs, outputs):
# Must add EOS_TOKEN, otherwise your generation will go on forever!
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
texts.append(text)
return { "text" : texts, }
pass
from datasets import load_dataset
dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)
# ========
# Train |
# ========
from trl import SFTConfig, SFTTrainer
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
packing = False,
args = SFTConfig(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 5,
max_steps = 60,
learning_rate = 2e-5,
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none", # Use this for WandB etc
),
)
trainer_stats = trainer.train()
# ===========
# Generate |
# ===========
inputs = tokenizer(
[
alpaca_prompt.format(
"Continue the fibonnaci sequence.", # instruction
"1, 1, 2, 3, 5, 8", # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment