Skip to content

Instantly share code, notes, and snippets.

@philschmid
Created November 8, 2022 08:58
Show Gist options
  • Select an option

  • Save philschmid/7d25d3951c1b787aa89acbee4a8979c6 to your computer and use it in GitHub Desktop.

Select an option

Save philschmid/7d25d3951c1b787aa89acbee4a8979c6 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DCTPgltiV7On",
"outputId": "3783f954-bd18-41a6-e978-e43ccc512791"
},
"outputs": [],
"source": [
"!pip install onnx onnxruntime transformers -q"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Qwme6qSYWg_V"
},
"source": [
"ref \n",
"\n",
"\n",
"\n",
"```python\n",
" import io\n",
" import onnx\n",
" import onnxruntime as rt\n",
" import numpy as np\n",
" import torch\n",
" import torchvision\n",
"\n",
" def main() -> None:\n",
"\n",
" input_shape = (1, 3, 224, 224)\n",
"\n",
" # Create a PyTorch model for ONNX export.\n",
" torch_model = torchvision.models.resnet18(pretrained=False)\n",
"\n",
" # Create a file-like binary stream using an in-memory bytes buffer.\n",
" with io.BytesIO() as f:\n",
"\n",
" # Export the model to the binary stream.\n",
" torch.onnx.export(model=torch_model,\n",
" args=torch.randn(*input_shape),\n",
" f=f)\n",
"\n",
" # Use ONNX load_model API to load a model from a binary stream.\n",
" # Change the stream position to the start of the stream.\n",
" f.seek(0)\n",
" model_proto_from_binary_stream = onnx.load_model(f, onnx.ModelProto)\n",
"\n",
" # Use ONNX load_model_from_string API to load a model from a binary string.\n",
" model_proto_from_binary_string = onnx.load_model_from_string(\n",
" f.getvalue(), onnx.ModelProto)\n",
"\n",
" # Equivalence of the two ONNX models loaded using different approaches.\n",
" assert model_proto_from_binary_stream == model_proto_from_binary_string\n",
"\n",
" model_proto = model_proto_from_binary_stream\n",
"\n",
" with io.BytesIO() as f:\n",
"\n",
" # Use ONNX save_model API to save model to a binary stream.\n",
" onnx.save_model(model_proto, f)\n",
"\n",
" # Use ONNX load_model API to load a model from a binary stream.\n",
" # Change the stream position to the start of the stream.\n",
" f.seek(0)\n",
" model_proto_from_binary_stream = onnx.load_model(f, onnx.ModelProto)\n",
"\n",
" # Use ONNX load_model_from_string API to load a model from a binary string.\n",
" model_proto_from_binary_string = onnx.load_model_from_string(\n",
" f.getvalue(), onnx.ModelProto)\n",
"\n",
" assert model_proto == model_proto_from_binary_stream\n",
" assert model_proto == model_proto_from_binary_string\n",
"\n",
" # Use ONNX _serialize to get binary string from ONNX model.\n",
" model_proto_bytes = onnx._serialize(model_proto)\n",
" assert model_proto_bytes == f.getvalue()\n",
"\n",
" # Use ONNX _deserialize to get ONNX model from binary string.\n",
" model_proto_from_deserialization = onnx._deserialize(\n",
" model_proto_bytes, onnx.ModelProto())\n",
" assert model_proto == model_proto_from_deserialization\n",
"\n",
" # Run ONNX Runtime.\n",
" # InferenceSession could also take bytes.\n",
" inference_session = rt.InferenceSession(model_proto_bytes)\n",
" onnxruntime_random_input = np.random.randn(*input_shape).astype(np.float32)\n",
"\n",
" input_name = inference_session.get_inputs()[0].name\n",
" prediction = inference_session.run(\n",
" None, {input_name: onnxruntime_random_input})[0]\n",
"\n",
"\n",
" if __name__ == \"__main__\":\n",
"\n",
" main()\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Uk9BTt-0WAuV"
},
"source": [
"convert model to onnx"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VXEvWzrWV_Wg",
"outputId": "c6f82ef1-5717-4e96-b9b9-940b99f65678"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2022-11-08 08:44:36.311771: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2022-11-08 08:44:36.431582: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2022-11-08 08:44:36.435780: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
"2022-11-08 08:44:36.435804: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n",
"2022-11-08 08:44:36.458399: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
"2022-11-08 08:44:36.891310: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n",
"2022-11-08 08:44:36.891362: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n",
"2022-11-08 08:44:36.891371: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"Framework not requested. Using torch to export to ONNX.\n",
"Using framework PyTorch: 1.12.1+cu102\n",
"/home/ubuntu/miniconda3/envs/dev/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py:213: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
" mask, torch.tensor(torch.finfo(scores.dtype).min)\n",
"Validating ONNX model...\n",
"\t-[✓] ONNX model output names match reference model ({'logits'})\n",
"\t- Validating ONNX Model output \"logits\":\n",
"\t\t-[✓] (3, 2) matches (3, 2)\n",
"\t\t-[✓] all values close (atol: 1e-05)\n",
"All good, model saved at: onnx/model.onnx\n"
]
}
],
"source": [
"!python -m transformers.onnx --model=distilbert-base-uncased-finetuned-sst-2-english \\\n",
" --feature=sequence-classification onnx/"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "CucS2-pJWCk3"
},
"source": [
"load as buffer\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 364
},
"id": "ZIiSaAkuWDmj",
"outputId": "c539110a-bbf1-48ab-f795-aa50f1d401ea"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"import io\n",
"import onnx\n",
"import onnxruntime as ort\n",
"from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
"\n",
"AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\").save_pretrained(\"trfs\")\n",
"\n",
"path=\"onnx/model.onnx\"\n",
"\n",
"def load_model_from_path_with_bytes(path=\"onnx/model.onnx\"):\n",
" with open(path, \"rb\") as f:\n",
" return ort.InferenceSession(f.read())\n",
"\n",
"with open(path, \"rb\") as f:\n",
" bytes = f.read()\n",
"\n",
"def load_model_from_bytes(bytes=bytes):\n",
" return ort.InferenceSession(bytes)\n",
"\n",
"def load_model_from_path(path=\"onnx/model.onnx\"):\n",
" return ort.InferenceSession(path)\n",
"\n",
"\n",
"def load_transformers_model(path=\"trfs\"):\n",
" return AutoModelForSequenceClassification.from_pretrained(path)\n",
" \n",
"load_model_from_path_with_bytes()\n",
"load_model_from_bytes()\n",
"load_model_from_path()\n",
"load_transformers_model()\n",
"print(\"\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "pddI6R_TWEBo"
},
"source": [
"measure"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"467 ms ± 3.35 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit \n",
"load_model_from_path_with_bytes()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"id": "xZ4tWkSuWFH3"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"359 ms ± 1.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit \n",
"load_model_from_bytes()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "5XAExTdpYCYH"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"291 ms ± 8.88 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit \n",
"load_model_from_path()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"385 ms ± 17.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"load_transformers_model()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "yP2u1_BOYBf4"
},
"source": []
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.9.13 ('dev': conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"vscode": {
"interpreter": {
"hash": "a40944fb6d302ad2eace17cfbb714ee95a1e6c7ab311709595ca70171602490b"
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment