Skip to content

Instantly share code, notes, and snippets.

@lucataco
lucataco / ollama_fast_speech_text_speech.py
Last active November 27, 2025 16:53
speech to text to speech using Ollama
""" To use: install Ollama, clone OpenVoice, run this script in the OpenVoice directory
brew install portaudio
brew install git-lfs
git lfs install
git clone https://github.com/myshell-ai/OpenVoice
cd OpenVoice
git clone https://huggingface.co/myshell-ai/OpenVoice
cp -r OpenVoice/* .
@lucataco
lucataco / svd.py
Created January 4, 2024 15:52
Run SVD locally
# from cog import BasePredictor, Input, Path
import os
import cv2
import time
import math
import torch
import numpy as np
from PIL import Image
from glob import glob
from typing import Optional
@lucataco
lucataco / notes.txt
Last active December 7, 2024 19:25
L40S vs A40 Benchmarks
**Goal**: Run benchmarks of SDXL, SVD, and Llama 13B on an L40S test node
**TL;DR**:
- L40S has same inference speeds as A40 for SDXL
- L40S has 10% faster inference speeds than A40S for llama2
- L40S are ~9% faster at Video rendering than A40s
**Process**: Run non-docker/cog python code for fp16
@lucataco
lucataco / sdxl.py
Created December 7, 2023 03:21
Run SDXL locally
from diffusers import DiffusionPipeline
import torch
import time
# load both base & refiner
t1 = time.time()
base = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
)
base.to("cuda")
@lucataco
lucataco / llama2-13b-chat.py
Last active May 20, 2025 08:37
Run llama2-13b locally
import os
import time
import torch
from typing import Iterator
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
#Change this to 512, 1024, 2048
MAX_NEW_TOKENS = 512
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@lucataco
lucataco / runllama2.py
Created November 29, 2023 20:33
Benchmark Llama2-13B speeds
import time
import json
import requests
# Start Llama2 13b locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/meta/llama-2-13b@sha256:078d7a002387bd96d93b0302a4c03b3f15824b63104034bfa943c63a8f208c38
url = "http://localhost:5000/predictions"
@lucataco
lucataco / runSVD.py
Created November 29, 2023 20:33
Benchmark SVD speed
import io
import time
import json
import base64
import requests
# Start SDXL locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/stability-ai/stable-video-diffusion@sha256:3f0457e4619daac51203dedb472816fd4af51f3149fa7a9e0b5ffcf1b8172438
@lucataco
lucataco / runSDXL.py
Created November 29, 2023 20:32
Benchmark SDXL speed
import io
import time
import json
import base64
import requests
from PIL import Image
# Start SDXL locally:
# docker run -d -p 5000:5000 --gpus=all r8.im/stability-ai/sdxl@sha256:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b
url = "http://localhost:5000/predictions"
@lucataco
lucataco / predict.py
Created October 15, 2023 02:48
RealvisXL-v1.0
from cog import BasePredictor, Input, Path
import os
import torch
import time
from diffusers import (DDIMScheduler,
DiffusionPipeline,
DPMSolverMultistepScheduler,
EulerAncestralDiscreteScheduler,
EulerDiscreteScheduler,
HeunDiscreteScheduler,
@lucataco
lucataco / Falcon7BHFspeedtest.py
Last active June 30, 2023 23:55
Falcon7B HF speed test
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import time
model = "tiiuae/falcon-7b"
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
"text-generation",