-
-
Save AmericanPresidentJimmyCarter/873985638e1f3541ba8b00137e7dacd9 to your computer and use it in GitHub Desktop.
how to run flux on your 16gb potato
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First, in your terminal. | |
# | |
# $ python3 -m virtualenv env | |
# $ source env/bin/activate | |
# $ pip install torch torchvision transformers sentencepiece protobuf accelerate | |
# $ pip install git+https://github.com/huggingface/diffusers.git | |
# $ pip install optimum-quanto | |
import torch | |
from optimum.quanto import freeze, qfloat8, quantize | |
from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL | |
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel | |
from diffusers.pipelines.flux.pipeline_flux import FluxPipeline | |
from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast | |
dtype = torch.bfloat16 | |
# schnell is the distilled turbo model. For the CFG distilled model, use: | |
# bfl_repo = "black-forest-labs/FLUX.1-dev" | |
# revision = "refs/pr/3" | |
# | |
# The undistilled model that uses CFG ("pro") which can use negative prompts | |
# was not released. | |
bfl_repo = "black-forest-labs/FLUX.1-schnell" | |
revision = "refs/pr/1" | |
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(bfl_repo, subfolder="scheduler", revision=revision) | |
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype) | |
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype) | |
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype, revision=revision) | |
tokenizer_2 = T5TokenizerFast.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype, revision=revision) | |
vae = AutoencoderKL.from_pretrained(bfl_repo, subfolder="vae", torch_dtype=dtype, revision=revision) | |
transformer = FluxTransformer2DModel.from_pretrained(bfl_repo, subfolder="transformer", torch_dtype=dtype, revision=revision) | |
# Experimental: Try this to load in 4-bit for <16GB cards. | |
# | |
# from optimum.quanto import qint4 | |
# quantize(transformer, weights=qint4, exclude=["proj_out", "x_embedder", "norm_out", "context_embedder"]) | |
# freeze(transformer) | |
quantize(transformer, weights=qfloat8) | |
freeze(transformer) | |
quantize(text_encoder_2, weights=qfloat8) | |
freeze(text_encoder_2) | |
pipe = FluxPipeline( | |
scheduler=scheduler, | |
text_encoder=text_encoder, | |
tokenizer=tokenizer, | |
text_encoder_2=None, | |
tokenizer_2=tokenizer_2, | |
vae=vae, | |
transformer=None, | |
) | |
pipe.text_encoder_2 = text_encoder_2 | |
pipe.transformer = transformer | |
pipe.enable_model_cpu_offload() | |
generator = torch.Generator().manual_seed(12345) | |
image = pipe( | |
prompt='nekomusume cat girl, digital painting', | |
width=1024, | |
height=1024, | |
num_inference_steps=4, | |
generator=generator, | |
guidance_scale=3.5, | |
).images[0] | |
image.save('test_flux_distilled.png') |
you need to add lora before quantizing
In my case, qfloat8 works well on V100. But 4090 needs qfloat8_e5m2, don't really know the reson tho :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I never got an answer so I gave up on LoRA support.