Skip to content

Instantly share code, notes, and snippets.

@luisquintanilla
Last active September 13, 2022 22:39
Show Gist options
  • Save luisquintanilla/dc420f90d956bad15b6ac0f72d7bc113 to your computer and use it in GitHub Desktop.
Save luisquintanilla/dc420f90d956bad15b6ac0f72d7bc113 to your computer and use it in GitHub Desktop.
Convert Word Bag Transform to ONNX ML.NET
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div><div></div><div></div><div><strong>Installed Packages</strong><ul><li><span>Microsoft.ML, 1.7.1</span></li><li><span>Microsoft.ML.OnnxConverter, 0.19.1</span></li><li><span>Microsoft.ML.OnnxTransformer, 1.7.1</span></li></ul></div></div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#r \"nuget:Microsoft.ML\"\n",
"#r \"nuget:Microsoft.ML.OnnxTransformer\"\n",
"#r \"nuget:Microsoft.ML.OnnxConverter\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"using System;\n",
"using System.IO;\n",
"using System.Linq;\n",
"using Microsoft.ML;\n",
"using Microsoft.ML.OnnxRuntime;\n",
"using Microsoft.ML.Data;\n",
"using Microsoft.ML.Transforms;"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var corpus = new [] \n",
"{\n",
"\tnew {Text = \"The quick brown fox jumped over the lazy dog. Dog is so lazy. Quick!\"},\n",
"\tnew {Text = \"The lazy dog was jumped over by the quick brown fox. Fox is not lazy\"}\n",
"};"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var ctx = new MLContext();"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var data = ctx.Data.LoadFromEnumerable(corpus);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var pipeline = ctx.Transforms.Text.ProduceWordBags (outputColumnName: \"WB\", inputColumnName:\"Text\", ngramLength:1);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var model = pipeline.Fit(data);"
]
},
{
"cell_type": "markdown",
"metadata": {
"dotnet_interactive": {
"language": "csharp"
}
},
"source": [
"var mlnetmodelOutputDv = model.Transform(data);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var mlnetmodelOutputDv = model.Transform(data);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [
{
"data": {
"text/html": [
"<table><thead><tr><th><i>index</i></th><th>Name</th><th>Index</th><th>IsHidden</th><th>Type</th><th>Annotations</th></tr></thead><tbody><tr><td>0</td><td>Text</td><td><div class=\"dni-plaintext\">0</div></td><td><div class=\"dni-plaintext\">False</div></td><td><table><thead><tr><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">System.ReadOnlyMemory&lt;System.Char&gt;</div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ ]</div></td></tr></tbody></table></td></tr><tr><td>1</td><td>WB</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">True</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 0 ]</div></td><td><div class=\"dni-plaintext\">False</div></td><td><div class=\"dni-plaintext\">{ String: RawType: System.ReadOnlyMemory&lt;System.Char&gt; }</div></td><td><div class=\"dni-plaintext\">0</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer&lt;System.ReadOnlyMemory&lt;System.Char&gt;&gt;</div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ ]</div></td></tr></tbody></table></td></tr><tr><td>2</td><td>WB</td><td><div class=\"dni-plaintext\">2</div></td><td><div class=\"dni-plaintext\">True</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 0 ]</div></td><td><div class=\"dni-plaintext\">False</div></td><td><div class=\"dni-plaintext\">{ Key&lt;UInt32, 0-19&gt;: Count: 20, RawType: System.UInt32 }</div></td><td><div class=\"dni-plaintext\">0</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer&lt;System.UInt32&gt;</div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ { KeyValues: Vector&lt;String, 20&gt;: Name: KeyValues, Index: 0, IsHidden: False, Type: { Vector&lt;String, 20&gt;: Dimensions: [ 20 ], IsKnownSize: True, ItemType: { String: RawType: System.ReadOnlyMemory&lt;System.Char&gt; }, Size: 20, RawType: Microsoft.ML.Data.VBuffer&lt;System.ReadOnlyMemory&lt;System.Char&gt;&gt; }, Annotations: { : Schema: [ ] } } ]</div></td></tr></tbody></table></td></tr><tr><td>3</td><td>WB</td><td><div class=\"dni-plaintext\">3</div></td><td><div class=\"dni-plaintext\">False</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 20 ]</div></td><td><div class=\"dni-plaintext\">True</div></td><td><div class=\"dni-plaintext\">{ Single: RawType: System.Single }</div></td><td><div class=\"dni-plaintext\">20</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer&lt;System.Single&gt;</div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ { SlotNames: Vector&lt;String, 20&gt;: Name: SlotNames, Index: 0, IsHidden: False, Type: { Vector&lt;String, 20&gt;: Dimensions: [ 20 ], IsKnownSize: True, ItemType: { String: RawType: System.ReadOnlyMemory&lt;System.Char&gt; }, Size: 20, RawType: Microsoft.ML.Data.VBuffer&lt;System.ReadOnlyMemory&lt;System.Char&gt;&gt; }, Annotations: { : Schema: [ ] } } ]</div></td></tr></tbody></table></td></tr></tbody></table>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"mlnetmodelOutputDv.Schema"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var wordbagOutputmlnet = mlnetmodelOutputDv.GetColumn<float[]>(\"WB\");"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [
{
"data": {
"text/html": [
"<table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 ]</div></td></tr><tr><td>1</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ]</div></td></tr></tbody></table>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"wordbagOutputmlnet"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"using(FileStream stream = File.Create(\"./onnx_model.onnx\"))\n",
"{\n",
" ctx.Model.ConvertToOnnx(model, data, stream);\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var onnxModelPipeline = ctx.Transforms.ApplyOnnxModel(\"onnx_model.onnx\");"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var onnxOutputDv = onnxModelPipeline.Fit(data).Transform(data);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [
{
"data": {
"text/html": [
"<table><thead><tr><th><i>index</i></th><th>Name</th><th>Index</th><th>IsHidden</th><th>Type</th><th>Annotations</th></tr></thead><tbody><tr><td>0</td><td>Text</td><td><div class=\"dni-plaintext\">0</div></td><td><div class=\"dni-plaintext\">True</div></td><td><table><thead><tr><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">System.ReadOnlyMemory&lt;System.Char&gt;</div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ ]</div></td></tr></tbody></table></td></tr><tr><td>1</td><td>Text</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">False</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 1, 1 ]</div></td><td><div class=\"dni-plaintext\">True</div></td><td><div class=\"dni-plaintext\">{ String: RawType: System.ReadOnlyMemory&lt;System.Char&gt; }</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer&lt;System.ReadOnlyMemory&lt;System.Char&gt;&gt;</div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ ]</div></td></tr></tbody></table></td></tr><tr><td>2</td><td>WB</td><td><div class=\"dni-plaintext\">2</div></td><td><div class=\"dni-plaintext\">False</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 1, 20 ]</div></td><td><div class=\"dni-plaintext\">True</div></td><td><div class=\"dni-plaintext\">{ Single: RawType: System.Single }</div></td><td><div class=\"dni-plaintext\">20</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer&lt;System.Single&gt;</div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ { SlotNames: Vector&lt;String, 20&gt;: Name: SlotNames, Index: 0, IsHidden: False, Type: { Vector&lt;String, 20&gt;: Dimensions: [ 20 ], IsKnownSize: True, ItemType: { String: RawType: System.ReadOnlyMemory&lt;System.Char&gt; }, Size: 20, RawType: Microsoft.ML.Data.VBuffer&lt;System.ReadOnlyMemory&lt;System.Char&gt;&gt; }, Annotations: { : Schema: [ ] } } ]</div></td></tr></tbody></table></td></tr></tbody></table>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"onnxOutputDv.Schema"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var wordBagOutputOnnx = onnxOutputDv.GetColumn<float[]>(\"WB\");"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [
{
"data": {
"text/html": [
"<table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 ]</div></td></tr><tr><td>1</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ]</div></td></tr></tbody></table>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"wordBagOutputOnnx"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [],
"source": [
"var comparison = \n",
" wordbagOutputmlnet.Zip(wordBagOutputOnnx, (a,b) => \n",
" { \n",
" var difference = a.Zip(b,(c,d) => c-d);\n",
" return new {MLNET=a, ONNX=b,Difference=difference};\n",
" });"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "csharp"
},
"vscode": {
"languageId": "dotnet-interactive.csharp"
}
},
"outputs": [
{
"data": {
"text/html": [
"<table><thead><tr><th><i>index</i></th><th>MLNET</th><th>ONNX</th><th>Difference</th></tr></thead><tbody><tr><td>0</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 ]</div></td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 ]</div></td><td><div class=\"dni-plaintext\">[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ... (more) ]</div></td></tr><tr><td>1</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ]</div></td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ]</div></td><td><div class=\"dni-plaintext\">[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ... (more) ]</div></td></tr></tbody></table>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"comparison"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".NET (C#)",
"language": "C#",
"name": ".net-csharp"
},
"language_info": {
"name": "C#"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment