Skip to content

Instantly share code, notes, and snippets.

View sayakpaul's full-sized avatar
:octocat:
Learn, unlearn and relearn.

Sayak Paul sayakpaul

:octocat:
Learn, unlearn and relearn.
View GitHub Profile
import torch
from diffusers import DiffusionPipeline
import spaces
from spaces.zero.torch.aoti import ZeroGPUCompiledModel, ZeroGPUWeights
from time import perf_counter
import argparse
CKPT_ID = "black-forest-labs/Flux.1-Dev"
@sayakpaul
sayakpaul / README.md
Last active September 5, 2025 02:22
Regional compilation in AoT
# Make sure you are using the latest `bitsandbytes` (at least 0.46.0) and PyTorch nightlies (at least 2.8).
# Put together by sayakpaul and anijain2305
from diffusers.quantizers import PipelineQuantizationConfig
from diffusers import FluxPipeline
import argparse
import json
import torch
import time
from functools import partial
from diffusers import DiffusionPipeline
import torch.utils.benchmark as benchmark
import torch
import psutil
import os
import json
import argparse
def benchmark_fn(f, *args, **kwargs):
t0 = benchmark.Timer(
@sayakpaul
sayakpaul / grade_images_with_gemini.py
Last active July 9, 2025 18:50
Shows how to use Gemini Flash 2.0 to grade images on multiple aspects like accuracy to prompt, emotional and thematic response, etc.
from google import genai
from google.genai import types
import typing_extensions as typing
from PIL import Image
import requests
import io
import json
import os
@sayakpaul
sayakpaul / generate_labels_with_deepseek.py
Last active February 7, 2025 15:56
Generate labels with DeepSeek and `transformers`.
"""
Implementation of the label generation part in https://danielvanstrien.xyz/posts/2025/deepseek/distil-deepseek-modernbert.html
using `transformers` and DeepSeek.
"""
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import re
import contextlib
import math
@sayakpaul
sayakpaul / create_collage_videos.py
Created January 30, 2025 10:42
Create nice collage videos from videos.
from moviepy.editor import VideoFileClip, clips_array
import glob
def create_video_collage(video_paths, output_path="collage.mp4"):
"""
Combine four videos of the same resolution into a 2×2 collage.
Args:
video_paths (list[str]): List of paths to the four video files.
output_path (str): Filename for the output collage video.
@sayakpaul
sayakpaul / benchmark_flux_without_compile.py
Created January 24, 2025 10:15
Benchmarking Flux across different optimizations.
from diffusers import DiffusionPipeline
from diffusers import FluxTransformer2DModel, BitsAndBytesConfig
from transformers import T5EncoderModel, BitsAndBytesConfig as BnbConfig
from offloader import ModelOffloaderV2
import torch.utils.benchmark as benchmark
from pathlib import Path
import os
import sys
import torch
import json
import torch
from diffusers.utils import export_to_video
from diffusers import LTXPipeline, LTXVideoTransformer3DModel, GGUFQuantizationConfig
ckpt_path = (
"https://huggingface.co/city96/LTX-Video-gguf/blob/main/ltx-video-2b-v0.9-Q3_K_S.gguf"
)
transformer = LTXVideoTransformer3DModel.from_single_file(
ckpt_path,
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
@sayakpaul
sayakpaul / aot_compile_with_int8_quant.py
Last active June 3, 2025 16:09
Shows how to AoT compile the Flux.1 Dev Transformer with int8 quant and perform inference.
import torch
from diffusers import FluxTransformer2DModel
import torch.utils.benchmark as benchmark
from torchao.quantization import quantize_, int8_weight_only
from torchao.utils import unwrap_tensor_subclass
import torch._inductor
torch._inductor.config.mixed_mm_choice = "triton"