Created
March 18, 2023 15:23
-
-
Save luiscape/9387d96f8674df1b0eed3390e0117bf0 to your computer and use it in GitHub Desktop.
Stable Diffusion on PyTorch 2.0 (Modal)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ## Basic setup | |
from __future__ import annotations | |
import io | |
import os | |
import time | |
from pathlib import Path | |
import modal | |
# All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for | |
# the application. Let's give it a friendly name. | |
stub = modal.Stub("stable-diffusion-cli") | |
# We will be using `typer` to create our CLI interface. | |
import typer | |
app = typer.Typer() | |
# ## Model dependencies | |
# | |
# Your model will be running remotely inside a container. We will be installing | |
# all the model dependencies in the next step. We will also be "baking the model" | |
# into the image by running a Python function as a part of building the image. | |
# This lets us start containers much faster, since all the data that's needed is | |
# already inside the image. | |
model_id = "runwayml/stable-diffusion-v1-5" | |
cache_path = "/vol/cache" | |
def download_models(): | |
import diffusers | |
import torch | |
hugging_face_token = os.environ["HUGGINGFACE_TOKEN"] | |
# Download scheduler configuration. Experiment with different schedulers | |
# to identify one that works best for your use-case. | |
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained( | |
model_id, | |
subfolder="scheduler", | |
use_auth_token=hugging_face_token, | |
cache_dir=cache_path, | |
) | |
scheduler.save_pretrained(cache_path, safe_serialization=True) | |
# Downloads all other models. | |
pipe = diffusers.StableDiffusionPipeline.from_pretrained( | |
model_id, | |
use_auth_token=hugging_face_token, | |
revision="fp16", | |
torch_dtype=torch.float16, | |
cache_dir=cache_path, | |
) | |
pipe.save_pretrained(cache_path, safe_serialization=True) | |
image = ( | |
modal.Image.debian_slim(python_version="3.10") | |
.pip_install( | |
"accelerate", | |
"diffusers[torch]>=0.10", | |
"ftfy", | |
"torchvision", | |
"transformers", | |
"triton", | |
"safetensors", | |
) | |
.pip_install("torch==2.0.0") | |
.run_function( | |
download_models, | |
secrets=[modal.Secret.from_name("huggingface-secret")], | |
) | |
) | |
stub.image = image | |
# ## Using container lifecycle methods | |
# | |
class StableDiffusion: | |
def __enter__(self): | |
import diffusers | |
import torch | |
import time | |
torch.backends.cuda.matmul.allow_tf32 = True | |
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained( | |
cache_path, | |
subfolder="scheduler", | |
solver_order=2, | |
prediction_type="epsilon", | |
thresholding=False, | |
algorithm_type="dpmsolver++", | |
solver_type="midpoint", | |
denoise_final=True, # important if steps are <= 10 | |
) | |
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained( | |
cache_path, scheduler=scheduler | |
).to("cuda") | |
t0 = time.time() | |
self.pipe.unet = torch.compile(self.pipe.unet) | |
print(f"compiled unet in => {time.time() - t0:3f}s") | |
@stub.function(gpu="A100", concurrency_limit=1) | |
def run_inference( | |
self, prompt: str, steps: int = 20, batch_size: int = 4 | |
) -> list[bytes]: | |
import torch | |
with torch.inference_mode(): | |
with torch.autocast("cuda"): | |
images = self.pipe( | |
[prompt] * batch_size, | |
num_inference_steps=steps, | |
guidance_scale=7.0, | |
).images | |
# Convert to PNG bytes | |
image_output = [] | |
for image in images: | |
with io.BytesIO() as buf: | |
image.save(buf, format="PNG") | |
image_output.append(buf.getvalue()) | |
return image_output | |
# This is the command we'll use to generate images. It takes a `prompt`, | |
# `samples` (the number of images you want to generate), `steps` which | |
# configures the number of inference steps the model will make, and `batch_size` | |
# which determines how many images to generate for a given prompt. | |
@stub.local_entrypoint | |
def entrypoint( | |
prompt: str, samples: int = 5, steps: int = 10, batch_size: int = 1 | |
): | |
typer.echo( | |
f"prompt => {prompt}, steps => {steps}, samples => {samples}, batch_size => {batch_size}" | |
) | |
dir = Path("/tmp/stable-diffusion") | |
if not dir.exists(): | |
dir.mkdir(exist_ok=True, parents=True) | |
sd = StableDiffusion() | |
for i in range(samples): | |
t0 = time.time() | |
images = sd.run_inference.call(prompt, steps, batch_size) | |
total_time = time.time() - t0 | |
print( | |
f"Sample {i} took {total_time:.3f}s ({(total_time)/len(images):.3f}s / image)." | |
) | |
for j, image_bytes in enumerate(images): | |
output_path = dir / f"output_{j}_{i}.png" | |
print(f"Saving it to {output_path}") | |
with open(output_path, "wb") as f: | |
f.write(image_bytes) | |
# And this is our entrypoint; where the CLI is invoked. Explore CLI options | |
# with: `modal run stable_diffusion_cli.py --help` | |
# # Performance | |
# | |
# This example can generate pictures in about a second, with startup time of about 10s for the first picture. | |
# | |
# See distribution of latencies below. This data was gathered by running 500 requests in sequence (meaning only | |
# the first request incurs a cold start). As you can see, the 90th percentile is 1.2s and the 99th percentile is 2.30s. | |
# | |
#  |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment