Created
September 21, 2023 16:47
-
-
Save tiandiao123/c29b5b9df3aa0aa900f475c9a9e44334 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import inspect | |
from typing import Any, Callable, Dict, List, Optional, Tuple, Union | |
import PIL.Image | |
import torch | |
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer | |
from diffusers import StableDiffusionXLImg2ImgPipeline | |
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor | |
from diffusers.loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin | |
from diffusers.models import AutoencoderKL, UNet2DConditionModel | |
from diffusers.models.attention_processor import ( | |
AttnProcessor2_0, | |
LoRAAttnProcessor2_0, | |
LoRAXFormersAttnProcessor, | |
XFormersAttnProcessor, | |
) | |
from diffusers.models.lora import adjust_lora_scale_text_encoder | |
from diffusers.schedulers import KarrasDiffusionSchedulers | |
from diffusers.utils import ( | |
is_invisible_watermark_available, | |
logging, | |
replace_example_docstring, | |
) | |
from diffusers.utils.torch_utils import randn_tensor | |
from diffusers.pipelines.pipeline_utils import DiffusionPipeline | |
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput | |
if is_invisible_watermark_available(): | |
from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker | |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name | |
EXAMPLE_DOC_STRING = """ | |
Examples: | |
```py | |
>>> import torch | |
>>> from diffusers import StableDiffusionXLImg2ImgPipeline | |
>>> from diffusers.utils import load_image | |
>>> pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained( | |
... "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16 | |
... ) | |
>>> pipe = pipe.to("cuda") | |
>>> url = "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png" | |
>>> init_image = load_image(url).convert("RGB") | |
>>> prompt = "a photo of an astronaut riding a horse on mars" | |
>>> image = pipe(prompt, image=init_image).images[0] | |
``` | |
""" | |
def benchmark(pipe, prompt, init_image, warmup = 3): | |
print('start benchmarking ...') | |
for i in range(warmup): | |
pipe(prompt, image=init_image, num_inference_steps=50).images[0] | |
times = [] | |
for i in range(10): | |
torch.cuda.synchronize() | |
start = time.time() | |
pipe(prompt, image=init_image, num_inference_steps=50).images[0] | |
torch.cuda.synchronize() | |
end = time.time() | |
times.append(end-start) | |
return sum(times)/len(times) | |
if __name__ == "__main__": | |
from diffusers.utils import load_image | |
from PIL import Image | |
import time | |
# pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained( | |
# "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16 | |
# ) | |
base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16") | |
base_pipe.to("cuda") | |
base_pipe.unet = torch.compile(base_pipe.unet, mode="reduce-overhead", fullgraph=True) | |
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16 | |
) | |
pipe = pipe.to("cuda") | |
# url = "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png" | |
init_image = Image.open("000000009.png").convert("RGB") | |
# init_image = load_image(url).convert("RGB") | |
prompt = "a photo of an astronaut riding on moon" | |
print("calling compile function ") | |
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) | |
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True) | |
image = pipe(prompt, image=init_image, num_inference_steps=50).images[0] | |
image = pipe(prompt, image=init_image, num_inference_steps=50).images[0] | |
print("finished compiled function ...") | |
high_noise_frac = 0.8 | |
image = base_pipe( | |
prompt=prompt, | |
num_inference_steps=50, | |
denoising_end=high_noise_frac, | |
output_type="latent", | |
).images | |
image = pipe( | |
prompt=prompt, | |
num_inference_steps=50, | |
denoising_start=high_noise_frac, | |
image=image, | |
).images[0] | |
image.save("test.png") | |
# latency_2 = benchmark(pipe, prompt, init_image) | |
# print("the latency after calling compile is {} s".format(str(latency_2))) | |
# image = pipe(prompt, image=init_image, num_inference_steps=50).images[0] | |
# image.save("test.png") | |
times = [] | |
warmup = 3 | |
for i in range(10): | |
torch.cuda.synchronize() | |
start = time.time() | |
image = base_pipe( | |
prompt=prompt, | |
num_inference_steps=50, | |
denoising_end=high_noise_frac, | |
output_type="latent", | |
height = 1024, | |
width = 1024, | |
).images | |
image = pipe( | |
prompt=prompt, | |
num_inference_steps=50, | |
denoising_start=high_noise_frac, | |
image=image, | |
).images[0] | |
end = time.time() | |
if i>=warmup: | |
times.append(end-start) | |
print("the latency is {} s".format(str(sum(times)/len(times)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment