Skip to content

Instantly share code, notes, and snippets.

@lucataco
lucataco / safeBloom.py
Created January 26, 2023 19:45
Safetensors speed comparison with bloom-560M
import os
import datetime
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
import torch
sf_filename = hf_hub_download("bigscience/bloom-560m", filename="model.safetensors")
pt_filename = hf_hub_download("bigscience/bloom-560m", filename="pytorch_model.bin")
@lucataco
lucataco / safeFlan.py
Created January 26, 2023 20:24
Safetensors speed comparison with flan-t5-large
import os
import datetime
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
import torch
sf_filename = "./model.safetensors"
pt_filename = hf_hub_download("google/flan-t5-large", filename="pytorch_model.bin")
@lucataco
lucataco / UbuntuMLsetup.sh
Last active October 19, 2025 23:24
Clean Ubuntu Install - Machine Learning setup
# Install Ubuntu 22.04
sudo apt-get update
sudo apt-get upgrade -y
# Install tools
sudo apt install git htop btop
# Install miniconda
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
chmod +x Miniconda3-latest-Linux-x86_64.sh
@lucataco
lucataco / Falcon7BHFspeedtest.py
Last active June 30, 2023 23:55
Falcon7B HF speed test
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import time
model = "tiiuae/falcon-7b"
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
"text-generation",
@lucataco
lucataco / predict.py
Created October 15, 2023 02:48
RealvisXL-v1.0
from cog import BasePredictor, Input, Path
import os
import torch
import time
from diffusers import (DDIMScheduler,
DiffusionPipeline,
DPMSolverMultistepScheduler,
EulerAncestralDiscreteScheduler,
EulerDiscreteScheduler,
HeunDiscreteScheduler,
@lucataco
lucataco / runSDXL.py
Created November 29, 2023 20:32
Benchmark SDXL speed
import io
import time
import json
import base64
import requests
from PIL import Image
# Start SDXL locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/stability-ai/sdxl@sha256:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b
url = "http://localhost:5000/predictions"
@lucataco
lucataco / runSVD.py
Created November 29, 2023 20:33
Benchmark SVD speed
import io
import time
import json
import base64
import requests
# Start SDXL locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/stability-ai/stable-video-diffusion@sha256:3f0457e4619daac51203dedb472816fd4af51f3149fa7a9e0b5ffcf1b8172438
@lucataco
lucataco / runllama2.py
Created November 29, 2023 20:33
Benchmark Llama2-13B speeds
import time
import json
import requests
# Start Llama2 13b locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/meta/llama-2-13b@sha256:078d7a002387bd96d93b0302a4c03b3f15824b63104034bfa943c63a8f208c38
url = "http://localhost:5000/predictions"
@lucataco
lucataco / llama2-13b-chat.py
Last active May 20, 2025 08:37
Run llama2-13b locally
import os
import time
import torch
from typing import Iterator
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
#Change this to 512, 1024, 2048
MAX_NEW_TOKENS = 512
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@lucataco
lucataco / sdxl.py
Created December 7, 2023 03:21
Run SDXL locally
from diffusers import DiffusionPipeline
import torch
import time
# load both base & refiner
t1 = time.time()
base = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
)
base.to("cuda")