davidberenstein1957 · May 16, 2025 09:04
diff --git a/optimize_qwen_with_pruna.py b/optimize_qwen_with_pruna.py
 # /// script
 # requires-python = ">=3.11,<3.12"
 # dependencies = [
 #     "pruna"
 # ]
 # ///
 from transformers import pipeline

 from pruna import SmashConfig, smash

 model_name = "Qwen/Qwen3-32B"
 pipe = pipeline("text-generation", model=model_name)

 # Initialize the SmashConfig
 smash_config = SmashConfig(cache_dir_prefix="/efs/smash_cache")
 smash_config["quantizer"] = "hqq"
 smash_config["hqq_weight_bits"] = 4
 smash_config["hqq_compute_dtype"] = "torch.bfloat16"
 smash_config["compiler"] = "torch_compile"
 smash_config["torch_compile_fullgraph"] = True
 smash_config["torch_compile_dynamic"] = True

 # Smash the model
 model = smash(
    model=pipe.model,
    smash_config=smash_config,
 )
	# /// script
	# requires-python = ">=3.11,<3.12"
	# dependencies = [
	# "pruna"
	# ]
	# ///
	from transformers import pipeline

	from pruna import SmashConfig, smash

	model_name = "Qwen/Qwen3-32B"
	pipe = pipeline("text-generation", model=model_name)

	# Initialize the SmashConfig
	smash_config = SmashConfig(cache_dir_prefix="/efs/smash_cache")
	smash_config["quantizer"] = "hqq"
	smash_config["hqq_weight_bits"] = 4
	smash_config["hqq_compute_dtype"] = "torch.bfloat16"
	smash_config["compiler"] = "torch_compile"
	smash_config["torch_compile_fullgraph"] = True
	smash_config["torch_compile_dynamic"] = True

	# Smash the model
	model = smash(
	model=pipe.model,
	smash_config=smash_config,
	)