FA3 attention processor comes from https://gist.github.com/sayakpaul/ff715f979793d4d44beb68e5e08ee067.
Results from an H100:
latency=36.606 seconds. (AoT regional compilation)
latency=36.555 seconds. (JiT regional compilation)
import torch | |
from diffusers import DiffusionPipeline | |
import spaces | |
from spaces.zero.torch.aoti import ZeroGPUCompiledModel, ZeroGPUWeights | |
from time import perf_counter | |
import argparse | |
CKPT_ID = "black-forest-labs/Flux.1-Dev" |
FA3 attention processor comes from https://gist.github.com/sayakpaul/ff715f979793d4d44beb68e5e08ee067.
Results from an H100:
latency=36.606 seconds. (AoT regional compilation)
latency=36.555 seconds. (JiT regional compilation)
# Make sure you are using the latest `bitsandbytes` (at least 0.46.0) and PyTorch nightlies (at least 2.8). | |
# Put together by sayakpaul and anijain2305 | |
from diffusers.quantizers import PipelineQuantizationConfig | |
from diffusers import FluxPipeline | |
import argparse | |
import json | |
import torch | |
import time | |
from functools import partial |
from diffusers import DiffusionPipeline | |
import torch.utils.benchmark as benchmark | |
import torch | |
import psutil | |
import os | |
import json | |
import argparse | |
def benchmark_fn(f, *args, **kwargs): | |
t0 = benchmark.Timer( |
from google import genai | |
from google.genai import types | |
import typing_extensions as typing | |
from PIL import Image | |
import requests | |
import io | |
import json | |
import os | |
""" | |
Implementation of the label generation part in https://danielvanstrien.xyz/posts/2025/deepseek/distil-deepseek-modernbert.html | |
using `transformers` and DeepSeek. | |
""" | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
import re | |
import contextlib | |
import math |
from moviepy.editor import VideoFileClip, clips_array | |
import glob | |
def create_video_collage(video_paths, output_path="collage.mp4"): | |
""" | |
Combine four videos of the same resolution into a 2×2 collage. | |
Args: | |
video_paths (list[str]): List of paths to the four video files. | |
output_path (str): Filename for the output collage video. |
from diffusers import DiffusionPipeline | |
from diffusers import FluxTransformer2DModel, BitsAndBytesConfig | |
from transformers import T5EncoderModel, BitsAndBytesConfig as BnbConfig | |
from offloader import ModelOffloaderV2 | |
import torch.utils.benchmark as benchmark | |
from pathlib import Path | |
import os | |
import sys | |
import torch | |
import json |
import torch | |
from diffusers.utils import export_to_video | |
from diffusers import LTXPipeline, LTXVideoTransformer3DModel, GGUFQuantizationConfig | |
ckpt_path = ( | |
"https://huggingface.co/city96/LTX-Video-gguf/blob/main/ltx-video-2b-v0.9-Q3_K_S.gguf" | |
) | |
transformer = LTXVideoTransformer3DModel.from_single_file( | |
ckpt_path, | |
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), |
import torch | |
from diffusers import FluxTransformer2DModel | |
import torch.utils.benchmark as benchmark | |
from torchao.quantization import quantize_, int8_weight_only | |
from torchao.utils import unwrap_tensor_subclass | |
import torch._inductor | |
torch._inductor.config.mixed_mm_choice = "triton" | |