Skip to content

Instantly share code, notes, and snippets.

View lucataco's full-sized avatar
🎯
Focusing

Luis Catacora lucataco

🎯
Focusing
View GitHub Profile
@lucataco
lucataco / sdxl.py
Created December 7, 2023 03:21
Run SDXL locally
from diffusers import DiffusionPipeline
import torch
import time
# load both base & refiner
t1 = time.time()
base = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
)
base.to("cuda")
@lucataco
lucataco / llama2-13b-chat.py
Last active May 20, 2025 08:37
Run llama2-13b locally
import os
import time
import torch
from typing import Iterator
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
#Change this to 512, 1024, 2048
MAX_NEW_TOKENS = 512
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@lucataco
lucataco / runllama2.py
Created November 29, 2023 20:33
Benchmark Llama2-13B speeds
import time
import json
import requests
# Start Llama2 13b locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/meta/llama-2-13b@sha256:078d7a002387bd96d93b0302a4c03b3f15824b63104034bfa943c63a8f208c38
url = "http://localhost:5000/predictions"
@lucataco
lucataco / runSVD.py
Created November 29, 2023 20:33
Benchmark SVD speed
import io
import time
import json
import base64
import requests
# Start SDXL locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/stability-ai/stable-video-diffusion@sha256:3f0457e4619daac51203dedb472816fd4af51f3149fa7a9e0b5ffcf1b8172438
@lucataco
lucataco / runSDXL.py
Created November 29, 2023 20:32
Benchmark SDXL speed
import io
import time
import json
import base64
import requests
from PIL import Image
# Start SDXL locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/stability-ai/sdxl@sha256:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b
url = "http://localhost:5000/predictions"
@lucataco
lucataco / predict.py
Created October 15, 2023 02:48
RealvisXL-v1.0
from cog import BasePredictor, Input, Path
import os
import torch
import time
from diffusers import (DDIMScheduler,
DiffusionPipeline,
DPMSolverMultistepScheduler,
EulerAncestralDiscreteScheduler,
EulerDiscreteScheduler,
HeunDiscreteScheduler,
@lucataco
lucataco / CoreWeaveCogSetup.sh
Created September 14, 2023 17:37
CoreWeave Cog setup
# Update Ubuntu 22.04
sudo apt-get update
# Install cog
sudo curl -o /usr/local/bin/cog -L "https://github.com/replicate/cog/releases/latest/download/cog_$(uname -s)_$(uname -m)"
sudo chmod +x /usr/local/bin/cog
# Install docker
sudo apt install docker-ce
sudo usermod -aG docker $USER
@lucataco
lucataco / gist:bbd420ab927fe2cfb8d8631fc880e07e
Created September 11, 2023 19:58
Replicate-LoRA-manual-load-weights-take2
import os
import json
import torch
from diffusers import DiffusionPipeline, EulerDiscreteScheduler
from safetensors import safe_open
from dataset_and_utils import TokenEmbeddingsHandler
from safetensors.torch import load_file
from diffusers.models.attention_processor import LoRAAttnProcessor2_0
pipe = DiffusionPipeline.from_pretrained(
@lucataco
lucataco / gist:338ed0efd2041ddf093f2bace84a6aee
Created September 11, 2023 19:58
Replicate-LoRA-manual-load-weights
import os
import torch
from diffusers import DiffusionPipeline, EulerDiscreteScheduler
from safetensors import safe_open
from dataset_and_utils import TokenEmbeddingsHandler
pipe = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16,
use_safetensors=True,
@lucataco
lucataco / Falcon7BHFspeedtest.py
Last active June 30, 2023 23:55
Falcon7B HF speed test
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import time
model = "tiiuae/falcon-7b"
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
"text-generation",