Skip to content

Instantly share code, notes, and snippets.

@tiandiao123
Created September 21, 2023 16:47
Show Gist options
  • Save tiandiao123/c29b5b9df3aa0aa900f475c9a9e44334 to your computer and use it in GitHub Desktop.
Save tiandiao123/c29b5b9df3aa0aa900f475c9a9e44334 to your computer and use it in GitHub Desktop.
import inspect
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import PIL.Image
import torch
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
from diffusers import StableDiffusionXLImg2ImgPipeline
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.attention_processor import (
AttnProcessor2_0,
LoRAAttnProcessor2_0,
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
)
from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import (
is_invisible_watermark_available,
logging,
replace_example_docstring,
)
from diffusers.utils.torch_utils import randn_tensor
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
if is_invisible_watermark_available():
from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
EXAMPLE_DOC_STRING = """
Examples:
```py
>>> import torch
>>> from diffusers import StableDiffusionXLImg2ImgPipeline
>>> from diffusers.utils import load_image
>>> pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
... "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16
... )
>>> pipe = pipe.to("cuda")
>>> url = "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
>>> init_image = load_image(url).convert("RGB")
>>> prompt = "a photo of an astronaut riding a horse on mars"
>>> image = pipe(prompt, image=init_image).images[0]
```
"""
def benchmark(pipe, prompt, init_image, warmup = 3):
print('start benchmarking ...')
for i in range(warmup):
pipe(prompt, image=init_image, num_inference_steps=50).images[0]
times = []
for i in range(10):
torch.cuda.synchronize()
start = time.time()
pipe(prompt, image=init_image, num_inference_steps=50).images[0]
torch.cuda.synchronize()
end = time.time()
times.append(end-start)
return sum(times)/len(times)
if __name__ == "__main__":
from diffusers.utils import load_image
from PIL import Image
import time
# pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
# "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16
# )
base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
base_pipe.to("cuda")
base_pipe.unet = torch.compile(base_pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16
)
pipe = pipe.to("cuda")
# url = "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
init_image = Image.open("000000009.png").convert("RGB")
# init_image = load_image(url).convert("RGB")
prompt = "a photo of an astronaut riding on moon"
print("calling compile function ")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
image = pipe(prompt, image=init_image, num_inference_steps=50).images[0]
image = pipe(prompt, image=init_image, num_inference_steps=50).images[0]
print("finished compiled function ...")
high_noise_frac = 0.8
image = base_pipe(
prompt=prompt,
num_inference_steps=50,
denoising_end=high_noise_frac,
output_type="latent",
).images
image = pipe(
prompt=prompt,
num_inference_steps=50,
denoising_start=high_noise_frac,
image=image,
).images[0]
image.save("test.png")
# latency_2 = benchmark(pipe, prompt, init_image)
# print("the latency after calling compile is {} s".format(str(latency_2)))
# image = pipe(prompt, image=init_image, num_inference_steps=50).images[0]
# image.save("test.png")
times = []
warmup = 3
for i in range(10):
torch.cuda.synchronize()
start = time.time()
image = base_pipe(
prompt=prompt,
num_inference_steps=50,
denoising_end=high_noise_frac,
output_type="latent",
height = 1024,
width = 1024,
).images
image = pipe(
prompt=prompt,
num_inference_steps=50,
denoising_start=high_noise_frac,
image=image,
).images[0]
end = time.time()
if i>=warmup:
times.append(end-start)
print("the latency is {} s".format(str(sum(times)/len(times))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment