-
-
Save fancellu/2ad4038eb8e871a70ff70f59b7bb4567 to your computer and use it in GitHub Desktop.
import gradio as gr | |
import numpy as np | |
import spaces | |
import torch | |
import random | |
from PIL import Image | |
from diffusers import FluxKontextPipeline | |
from diffusers.utils import load_image | |
# Import quantization libraries | |
try: | |
import bitsandbytes | |
import hqq | |
print("Quantization libraries loaded") | |
except ImportError as e: | |
print(f"Quantization libraries not available: {e}") | |
MAX_SEED = np.iinfo(np.int32).max | |
# Try HQQ 4-bit quantized version for much lower VRAM usage | |
try: | |
pipe = FluxKontextPipeline.from_pretrained( | |
"HighCWu/FLUX.1-Kontext-dev-bnb-hqq-4bit", | |
torch_dtype=torch.bfloat16 | |
).to("cuda") | |
print("Using HQQ 4-bit quantized model") | |
except Exception as e: | |
print(f"HQQ model failed: {e}") | |
# Fallback to original with aggressive optimizations | |
pipe = FluxKontextPipeline.from_pretrained( | |
"black-forest-labs/FLUX.1-Kontext-dev", | |
torch_dtype=torch.bfloat16, | |
low_cpu_mem_usage=True | |
) | |
pipe.enable_sequential_cpu_offload() | |
print("Using original model with CPU offload") | |
@spaces.GPU | |
def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=28, | |
progress=gr.Progress(track_tqdm=True)): | |
if randomize_seed: | |
seed = random.randint(0, MAX_SEED) | |
if input_image: | |
input_image = input_image.convert("RGB") | |
image = pipe( | |
image=input_image, | |
prompt=prompt, | |
guidance_scale=guidance_scale, | |
width=input_image.size[0], | |
height=input_image.size[1], | |
num_inference_steps=steps, | |
generator=torch.Generator().manual_seed(seed), | |
).images[0] | |
else: | |
image = pipe( | |
prompt=prompt, | |
guidance_scale=guidance_scale, | |
num_inference_steps=steps, | |
generator=torch.Generator().manual_seed(seed), | |
).images[0] | |
return image, seed, gr.Button(visible=True) | |
@spaces.GPU | |
def infer_example(input_image, prompt): | |
image, seed, _ = infer(input_image, prompt) | |
return image, seed | |
css = """ | |
#col-container { | |
margin: 0 auto; | |
max-width: 960px; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown(f"""# FLUX.1 Kontext [dev] - HQQ 4-bit | |
Image editing model with 4-bit quantization for lower VRAM usage | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(label="Upload the image for editing", type="pil") | |
with gr.Row(): | |
prompt = gr.Text( | |
label="Prompt", | |
show_label=False, | |
max_lines=1, | |
placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')", | |
container=False, | |
) | |
run_button = gr.Button("Run", scale=0) | |
with gr.Accordion("Advanced Settings", open=False): | |
seed = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=1, | |
value=0, | |
) | |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
guidance_scale = gr.Slider( | |
label="Guidance Scale", | |
minimum=1, | |
maximum=10, | |
step=0.1, | |
value=2.5, | |
) | |
steps = gr.Slider( | |
label="Steps", | |
minimum=1, | |
maximum=30, | |
value=28, | |
step=1 | |
) | |
with gr.Column(): | |
result = gr.Image(label="Result", show_label=False, interactive=False) | |
reuse_button = gr.Button("Reuse this image", visible=False) | |
examples = gr.Examples( | |
examples=[ | |
["flowers.png", "turn the flowers into sunflowers"], | |
["monster.png", "make this monster ride a skateboard on the beach"], | |
["cat.png", "make this cat happy"] | |
], | |
inputs=[input_image, prompt], | |
outputs=[result, seed], | |
fn=infer_example, | |
cache_examples="lazy" | |
) | |
gr.on( | |
triggers=[run_button.click, prompt.submit], | |
fn=infer, | |
inputs=[input_image, prompt, seed, randomize_seed, guidance_scale, steps], | |
outputs=[result, seed, reuse_button] | |
) | |
reuse_button.click( | |
fn=lambda image: image, | |
inputs=[result], | |
outputs=[input_image] | |
) | |
demo.launch(mcp_server=True) |
torch | |
torchvision | |
torchaudio | |
gradio | |
diffusers | |
transformers>=4.53.1 | |
numpy<2 | |
bitsandbytes | |
hqq | |
pillow | |
spaces |
fancellu
commented
Jul 7, 2025
can you help me , i am trying to run it in kaggle p100 gpu , but got this error : "Quantization libraries loaded
Loading pipeline components...: 100%
7/7 [00:11<00:00, 2.92s/it]
HQQ model failed: .to
is not supported for HQQ-quantized models.
"
The P100 is old old old. 2016. Unfortunately. Its memory management is simply not up to HQQ needs. Even if it could run, it would run sooooo slowly
Pascal Architecture Limitations:
No Tensor Cores: Critical for AI inference acceleration
Compute Capability 6.0: Limited optimization support
Older Memory Interface: HBM2 at lower speeds
Limited Quantization Support: Reduced compatibility with modern methods
It isn't supported by Nvidia on that model.
https://docs.nvidia.com/nim/visual-genai/1.1.1/support-matrix.html