fancellu · July 18, 2025 11:51 · fancellu · Jul 18, 2025
diff --git a/kontext_gpu_4bit.py b/kontext_gpu_4bit.py
 import gradio as gr
 import numpy as np
 import spaces
 import torch
 import random
 from PIL import Image

 from diffusers import FluxKontextPipeline
 from diffusers.utils import load_image

 # Import quantization libraries
 try:
    import bitsandbytes
    import hqq
    print("Quantization libraries loaded")
 except ImportError as e:
    print(f"Quantization libraries not available: {e}")

 MAX_SEED = np.iinfo(np.int32).max

 # Try HQQ 4-bit quantized version for much lower VRAM usage
 try:
    pipe = FluxKontextPipeline.from_pretrained(
        "HighCWu/FLUX.1-Kontext-dev-bnb-hqq-4bit",
        torch_dtype=torch.bfloat16
    ).to("cuda")
    print("Using HQQ 4-bit quantized model")
 except Exception as e:
    print(f"HQQ model failed: {e}")
    # Fallback to original with aggressive optimizations
    pipe = FluxKontextPipeline.from_pretrained(
        "black-forest-labs/FLUX.1-Kontext-dev",
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True
    )
    pipe.enable_sequential_cpu_offload()
    print("Using original model with CPU offload")

 @spaces.GPU
 def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=28,
          progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    if input_image:
        input_image = input_image.convert("RGB")
        image = pipe(
            image=input_image,
            prompt=prompt,
            guidance_scale=guidance_scale,
            width=input_image.size[0],
            height=input_image.size[1],
            num_inference_steps=steps,
            generator=torch.Generator().manual_seed(seed),
        ).images[0]
    else:
        image = pipe(
            prompt=prompt,
            guidance_scale=guidance_scale,
            num_inference_steps=steps,
            generator=torch.Generator().manual_seed(seed),
        ).images[0]
    return image, seed, gr.Button(visible=True)

 @spaces.GPU
 def infer_example(input_image, prompt):
    image, seed, _ = infer(input_image, prompt)
    return image, seed

 css = """
 #col-container {
    margin: 0 auto;
    max-width: 960px;
 }
 """

 with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"""# FLUX.1 Kontext [dev] - HQQ 4-bit
 Image editing model with 4-bit quantization for lower VRAM usage
        """)
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(label="Upload the image for editing", type="pil")
                with gr.Row():
                    prompt = gr.Text(
                        label="Prompt",
                        show_label=False,
                        max_lines=1,
                        placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')",
                        container=False,
                    )
                    run_button = gr.Button("Run", scale=0)
                with gr.Accordion("Advanced Settings", open=False):
                    seed = gr.Slider(
                        label="Seed",
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        value=0,
                    )
                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                    guidance_scale = gr.Slider(
                        label="Guidance Scale",
                        minimum=1,
                        maximum=10,
                        step=0.1,
                        value=2.5,
                    )
                    steps = gr.Slider(
                        label="Steps",
                        minimum=1,
                        maximum=30,
                        value=28,
                        step=1
                    )

            with gr.Column():
                result = gr.Image(label="Result", show_label=False, interactive=False)
                reuse_button = gr.Button("Reuse this image", visible=False)

        examples = gr.Examples(
            examples=[
                ["flowers.png", "turn the flowers into sunflowers"],
                ["monster.png", "make this monster ride a skateboard on the beach"],
                ["cat.png", "make this cat happy"]
            ],
            inputs=[input_image, prompt],
            outputs=[result, seed],
            fn=infer_example,
            cache_examples="lazy"
        )

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[input_image, prompt, seed, randomize_seed, guidance_scale, steps],
        outputs=[result, seed, reuse_button]
    )
    reuse_button.click(
        fn=lambda image: image,
        inputs=[result],
        outputs=[input_image]
    )

 demo.launch(mcp_server=True)
diff --git a/requirements.txt b/requirements.txt
 torch
 torchvision
 torchaudio
 gradio
 diffusers
 transformers>=4.53.1
 numpy<2
 bitsandbytes
 hqq
 pillow
 spaces
	import gradio as gr
	import numpy as np
	import spaces
	import torch
	import random
	from PIL import Image

	from diffusers import FluxKontextPipeline
	from diffusers.utils import load_image

	# Import quantization libraries
	try:
	import bitsandbytes
	import hqq
	print("Quantization libraries loaded")
	except ImportError as e:
	print(f"Quantization libraries not available: {e}")

	MAX_SEED = np.iinfo(np.int32).max

	# Try HQQ 4-bit quantized version for much lower VRAM usage
	try:
	pipe = FluxKontextPipeline.from_pretrained(
	"HighCWu/FLUX.1-Kontext-dev-bnb-hqq-4bit",
	torch_dtype=torch.bfloat16
	).to("cuda")
	print("Using HQQ 4-bit quantized model")
	except Exception as e:
	print(f"HQQ model failed: {e}")
	# Fallback to original with aggressive optimizations
	pipe = FluxKontextPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-Kontext-dev",
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True
	)
	pipe.enable_sequential_cpu_offload()
	print("Using original model with CPU offload")

	@spaces.GPU
	def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=28,
	progress=gr.Progress(track_tqdm=True)):
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	if input_image:
	input_image = input_image.convert("RGB")
	image = pipe(
	image=input_image,
	prompt=prompt,
	guidance_scale=guidance_scale,
	width=input_image.size[0],
	height=input_image.size[1],
	num_inference_steps=steps,
	generator=torch.Generator().manual_seed(seed),
	).images[0]
	else:
	image = pipe(
	prompt=prompt,
	guidance_scale=guidance_scale,
	num_inference_steps=steps,
	generator=torch.Generator().manual_seed(seed),
	).images[0]
	return image, seed, gr.Button(visible=True)

	@spaces.GPU
	def infer_example(input_image, prompt):
	image, seed, _ = infer(input_image, prompt)
	return image, seed

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 960px;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown(f"""# FLUX.1 Kontext [dev] - HQQ 4-bit
	Image editing model with 4-bit quantization for lower VRAM usage
	""")
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(label="Upload the image for editing", type="pil")
	with gr.Row():
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')",
	container=False,
	)
	run_button = gr.Button("Run", scale=0)
	with gr.Accordion("Advanced Settings", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1,
	maximum=10,
	step=0.1,
	value=2.5,
	)
	steps = gr.Slider(
	label="Steps",
	minimum=1,
	maximum=30,
	value=28,
	step=1
	)

	with gr.Column():
	result = gr.Image(label="Result", show_label=False, interactive=False)
	reuse_button = gr.Button("Reuse this image", visible=False)

	examples = gr.Examples(
	examples=[
	["flowers.png", "turn the flowers into sunflowers"],
	["monster.png", "make this monster ride a skateboard on the beach"],
	["cat.png", "make this cat happy"]
	],
	inputs=[input_image, prompt],
	outputs=[result, seed],
	fn=infer_example,
	cache_examples="lazy"
	)

	gr.on(
	triggers=[run_button.click, prompt.submit],
	fn=infer,
	inputs=[input_image, prompt, seed, randomize_seed, guidance_scale, steps],
	outputs=[result, seed, reuse_button]
	)
	reuse_button.click(
	fn=lambda image: image,
	inputs=[result],
	outputs=[input_image]
	)

	demo.launch(mcp_server=True)
	torch
	torchvision
	torchaudio
	gradio
	diffusers
	transformers>=4.53.1
	numpy<2
	bitsandbytes
	hqq
	pillow
	spaces