Skip to content

Instantly share code, notes, and snippets.

@Mic92
Last active February 19, 2026 11:47
Show Gist options
  • Select an option

  • Save Mic92/8b7677699b1c6e7675fe9c80b414c70a to your computer and use it in GitHub Desktop.

Select an option

Save Mic92/8b7677699b1c6e7675fe9c80b414c70a to your computer and use it in GitHub Desktop.
Run Phi-3 Mini on a local CUDA GPU with NixOS + direnv
.venv
.direnv
__pycache__
#!/usr/bin/env python3
"""Run Phi-3 Mini on a local CUDA GPU."""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
MODEL_NAME = "microsoft/phi-3-mini-4k-instruct"
def main() -> None:
print(f"Loading {MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="cuda",
)
print(f"Model loaded. GPU memory: {torch.cuda.memory_allocated() / 1024**3:.1f} GB")
messages = [{"role": "user", "content": "Explain quantum computing in one paragraph."}]
inputs = tokenizer.apply_chat_template(
messages, return_tensors="pt", return_dict=True
).to("cuda")
print("Generating...")
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=256)
response = tokenizer.decode(
outputs[0][inputs["input_ids"].shape[1] :], skip_special_tokens=True
)
print(f"\nPhi-3 says:\n{response}")
print(f"\nGPU memory used: {torch.cuda.memory_allocated() / 1024**3:.1f} GB")
if __name__ == "__main__":
main()
{ pkgs ? import <nixpkgs> { config.allowUnfree = true; config.cudaSupport = true; } }:
pkgs.mkShell {
name = "cuda-dev";
buildInputs = with pkgs; [
pkgs.python3
cudaPackages.cudatoolkit
cudaPackages.cudnn
];
shellHook = ''
export CUDA_HOME="${pkgs.cudaPackages.cudatoolkit}"
export LD_LIBRARY_PATH="/run/opengl-driver/lib:${pkgs.lib.makeLibraryPath [
pkgs.cudaPackages.cudatoolkit
pkgs.cudaPackages.cudnn
pkgs.stdenv.cc.cc.lib
pkgs.zlib
]}:$LD_LIBRARY_PATH"
if [ ! -d .venv ]; then
python -m venv .venv
fi
source .venv/bin/activate
echo "CUDA dev shell ready."
echo ""
echo "First time setup:"
echo " pip install torch transformers accelerate"
echo ""
echo "Run Phi-3:"
echo " python phi3.py"
'';
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment