Skip to content

Instantly share code, notes, and snippets.

View alvarobartt's full-sized avatar
🚢

Alvaro Bartolome alvarobartt

🚢
View GitHub Profile
@alvarobartt
alvarobartt / estimate-deepseek-vram.py
Created January 31, 2025 15:52
Calculates the required VRAM for DeepSeek R1 (pulled from the Hugging Face Hub Safetensors metadata)
from huggingface_hub import get_safetensors_metadata
model_id = "deepseek-ai/DeepSeek-R1"
dtype_bytes = {"F32": 4, "F16": 2, "F8": 1}
metadata = get_safetensors_metadata(model_id)
memory = (
sum(count * dtype_bytes[key.split("_")[0]] for key, count in metadata.parameter_count.items())
/ (1024**3)
* 1.18
@alvarobartt
alvarobartt / required_vram.py
Last active January 31, 2025 15:50
Calculates the required VRAM for different precisions based on the number of parameters of a model (pulled from the Hugging Face Hub Safetensors metadata). This Gist is inspired on https://gist.github.com/philschmid/d188034c759811a7183e7949e1fa0aa4.
from huggingface_hub import get_safetensors_metadata
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
precision = "F8"
dtype_bytes = {"F32": 4, "F16": 2, "BF16": 2, "F8": 1, "INT8": 1, "INT4": 0.5}
metadata = get_safetensors_metadata(model_id)
memory = ((sum(metadata.parameter_count.values()) * dtype_bytes[precision]) / (1024**3)) * 1.18
print(f"{model_id=} requires {memory=}GB")
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@alvarobartt
alvarobartt / duckdb_query_to_datasets.py
Created September 12, 2024 15:49
DuckDB SQL query to datasets.Dataset
import duckdb
from datasets import Dataset
# Create DuckDB connection
con = duckdb.connect()
con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")
# Query the dataset
query = """
@alvarobartt
alvarobartt / diffusers_flux_lora_inference.py
Last active August 31, 2024 08:42
Run FLUX LoRA with `diffusers` with `alvarobartt/ghibli-characters-flux-lora` adapter weights
import torch
from diffusers import DiffusionPipeline
model_id = "black-forest-labs/FLUX.1-dev"
adapter_id = "alvarobartt/ghibli-characters-flux-lora"
pipeline = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
pipeline.load_lora_weights(adapter_id)
pipeline.to("cuda")
@alvarobartt
alvarobartt / torch_text_generation_mps.py
Last active August 20, 2024 07:09
Simple script on using `torch` for text-generation with a `transformers` model one token at a time on MPS.
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Define the model name
model_name = "HuggingFaceTB/SmolLM-1.7B-Instruct"
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
@alvarobartt
alvarobartt / kv_cache_computation.py
Last active February 3, 2025 15:41
KV Cache Size Computation
from transformers import AutoConfig
if __name__ == "__main__":
config = AutoConfig.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", token="hf_...")
tokens_in_cache = 1024 # this is the only arg that will change over time (as more requests are sent)
precision_in_bytes = 2 # float16 or bfloat16
cache_size_bytes = (
2 *
import time
from typing import Any, Dict, Literal
from distilabel.llms import vLLM
from distilabel.llms.typing import ChatType
from distilabel.pipeline import Pipeline
from distilabel.steps import LoadDataFromDicts
from distilabel.steps.tasks.prometheus_eval import PrometheusEval
_CUSTOM_RUBRICS = {
# pip install "distilabel[vllm]>=1.1.1"
# pip install flash-attn --no-build-isolation
# huggingface-cli login
import time
from distilabel.llms import vLLM
from distilabel.pipeline import Pipeline
from distilabel.steps import KeepColumns, LoadHubDataset
from distilabel.steps.tasks import PrometheusEval
from distilabel.llms import (
AnthropicLLM,
InferenceEndpointsLLM,
OpenAILLM,
)
from distilabel.pipeline import Pipeline
from distilabel.steps import (
CombineColumns,
KeepColumns,
LoadDataFromDicts,