Skip to content

Instantly share code, notes, and snippets.

@tomaarsen
Last active June 4, 2025 11:36
Show Gist options
  • Save tomaarsen/4b00b0e3be8884efa64cfab9230b161f to your computer and use it in GitHub Desktop.
Save tomaarsen/4b00b0e3be8884efa64cfab9230b161f to your computer and use it in GitHub Desktop.
Export Sentence Transformer models to ONNX (+ optimization, quantization) & OpenVINO
# requires sentence_transformers>=3.2.0
from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model
# The model to export to ONNX (+ optimize, quantize), OpenVINO
model_id = "mixedbread-ai/mxbai-embed-large-v1"
# Where to save the exported models locally
output_dir = model_id.replace("/", "-")
onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
onnx_model.save_pretrained(output_dir)
for optimization_config in ["O1", "O2", "O3", "O4"]:
export_optimized_onnx_model(
onnx_model,
optimization_config=optimization_config,
model_name_or_path=output_dir,
)
for quantization_config in ['arm64', 'avx2', 'avx512', 'avx512_vnni']:
export_dynamic_quantized_onnx_model(
onnx_model,
quantization_config=quantization_config,
model_name_or_path=output_dir,
)
openvino_model = SentenceTransformer(model_id, backend="openvino")
openvino_model.save_pretrained(output_dir)
# requires sentence_transformers>=3.2.0
from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model
# The model to export to ONNX (+ optimize, quantize), OpenVINO
model_id = "mixedbread-ai/mxbai-embed-large-v1"
# The repository to push the ONNX, OpenVINO models to
output_model_id = "tomaarsen/mxbai-embed-large-v1-exported"
# Do we push directly, or create a PR? A PR is useful for reviewing the changes
# before merging or if you don't have write access.
create_pr = False
onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
onnx_model.push_to_hub(output_model_id, exist_ok=True, create_pr=create_pr)
for optimization_config in ["O1", "O2", "O3", "O4"]:
export_optimized_onnx_model(
onnx_model,
optimization_config=optimization_config,
model_name_or_path=output_model_id,
push_to_hub=True,
create_pr=create_pr,
)
for quantization_config in ['arm64', 'avx2', 'avx512', 'avx512_vnni']:
export_dynamic_quantized_onnx_model(
onnx_model,
quantization_config=quantization_config,
model_name_or_path=output_model_id,
push_to_hub=True,
create_pr=create_pr,
)
openvino_model = SentenceTransformer(model_id, backend="openvino")
openvino_model.push_to_hub(output_model_id, exist_ok=True, create_pr=create_pr)
# This script is identical as the one above, except it groups all changes into one pull request for convenience
# requires sentence_transformers>=3.2.0
from sentence_transformers import (
SentenceTransformer,
export_optimized_onnx_model,
export_dynamic_quantized_onnx_model,
)
from huggingface_hub import upload_folder
# The model to export to ONNX (+ optimize, quantize), OpenVINO
model_id = "BAAI/llm-embedder"
# Where to save the exported models locally
output_dir = model_id.replace("/", "-")
# Where to create the PR
output_model_id = "BAAI/llm-embedder"
onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
onnx_model.save_pretrained(output_dir)
for optimization_config in ["O1", "O2", "O3", "O4"]:
export_optimized_onnx_model(
onnx_model,
optimization_config=optimization_config,
model_name_or_path=output_dir,
)
for quantization_config in ["arm64", "avx2", "avx512", "avx512_vnni"]:
export_dynamic_quantized_onnx_model(
onnx_model,
quantization_config=quantization_config,
model_name_or_path=output_dir,
)
openvino_model = SentenceTransformer(model_id, backend="openvino")
openvino_model.save_pretrained(output_dir)
upload_folder(
folder_path=output_dir,
repo_id=output_model_id,
repo_type="model",
create_pr=True,
commit_message="Export model to ONNX and OpenVINO",
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment