alvarobartt’s gists

alvarobartt / instruction-dataset-prometheus.py

Last active July 6, 2024 14:22

Code to generate https://huggingface.co/datasets/distilabel-internal-testing/instruction-dataset-prometheus

	# pip install "distilabel[vllm]>=1.1.1"
	# pip install flash-attn --no-build-isolation
	# huggingface-cli login
	import time

	from distilabel.llms import vLLM
	from distilabel.pipeline import Pipeline
	from distilabel.steps import KeepColumns, LoadHubDataset
	from distilabel.steps.tasks import PrometheusEval

alvarobartt / prometheus-custom-rubric.py

Created May 8, 2024 12:41

Context at https://twitter.com/vanstriendaniel/status/1788161128159179101

	import time
	from typing import Any, Dict, Literal

	from distilabel.llms import vLLM
	from distilabel.llms.typing import ChatType
	from distilabel.pipeline import Pipeline
	from distilabel.steps import LoadDataFromDicts
	from distilabel.steps.tasks.prometheus_eval import PrometheusEval

	_CUSTOM_RUBRICS = {

alvarobartt / kv_cache_computation.py

Last active March 27, 2025 17:04

KV Cache Size Computation

	from transformers import AutoConfig

	if __name__ == "__main__":
	config = AutoConfig.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", token="hf_...")

	tokens_in_cache = 1024 # this is the only arg that will change over time (as more requests are sent)
	precision_in_bytes = 2 # float16 or bfloat16

	cache_size_bytes = (
	2 *

alvarobartt / torch_text_generation_mps.py

Last active August 20, 2024 07:09

Simple script on using `torch` for text-generation with a `transformers` model one token at a time on MPS.

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Define the model name
	model_name = "HuggingFaceTB/SmolLM-1.7B-Instruct"

	# Load the tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

alvarobartt / diffusers_flux_lora_inference.py

Last active August 31, 2024 08:42

Run FLUX LoRA with `diffusers` with `alvarobartt/ghibli-characters-flux-lora` adapter weights

	import torch
	from diffusers import DiffusionPipeline

	model_id = "black-forest-labs/FLUX.1-dev"
	adapter_id = "alvarobartt/ghibli-characters-flux-lora"

	pipeline = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
	pipeline.load_lora_weights(adapter_id)
	pipeline.to("cuda")

alvarobartt / duckdb_query_to_datasets.py

Created September 12, 2024 15:49

DuckDB SQL query to datasets.Dataset

	import duckdb
	from datasets import Dataset

	# Create DuckDB connection
	con = duckdb.connect()
	con.execute("INSTALL httpfs;")
	con.execute("LOAD httpfs;")

	# Query the dataset
	query = """

alvarobartt / google-cloud-containers-issue-93.ipynb

Created September 17, 2024 10:17

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

alvarobartt / required_vram.py

Last active January 31, 2025 15:50

Calculates the required VRAM for different precisions based on the number of parameters of a model (pulled from the Hugging Face Hub Safetensors metadata). This Gist is inspired on https://gist.github.com/philschmid/d188034c759811a7183e7949e1fa0aa4.

	from huggingface_hub import get_safetensors_metadata

	model_id = "mistralai/Mistral-7B-Instruct-v0.1"
	precision = "F8"
	dtype_bytes = {"F32": 4, "F16": 2, "BF16": 2, "F8": 1, "INT8": 1, "INT4": 0.5}

	metadata = get_safetensors_metadata(model_id)
	memory = ((sum(metadata.parameter_count.values()) * dtype_bytes[precision]) / (1024*3)) 1.18
	print(f"{model_id=} requires {memory=}GB")

alvarobartt / estimate-deepseek-vram.py

Created January 31, 2025 15:52

Calculates the required VRAM for DeepSeek R1 (pulled from the Hugging Face Hub Safetensors metadata)

	from huggingface_hub import get_safetensors_metadata

	model_id = "deepseek-ai/DeepSeek-R1"
	dtype_bytes = {"F32": 4, "F16": 2, "F8": 1}

	metadata = get_safetensors_metadata(model_id)
	memory = (
	sum(count * dtype_bytes[key.split("_")[0]] for key, count in metadata.parameter_count.items())
	/ (1024**3)
	* 1.18

Alvaro Bartolome alvarobartt