Last active
September 13, 2022 22:39
-
-
Save luisquintanilla/dc420f90d956bad15b6ac0f72d7bc113 to your computer and use it in GitHub Desktop.
Convert Word Bag Transform to ONNX ML.NET
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><div></div><div></div><div><strong>Installed Packages</strong><ul><li><span>Microsoft.ML, 1.7.1</span></li><li><span>Microsoft.ML.OnnxConverter, 0.19.1</span></li><li><span>Microsoft.ML.OnnxTransformer, 1.7.1</span></li></ul></div></div>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"#r \"nuget:Microsoft.ML\"\n", | |
"#r \"nuget:Microsoft.ML.OnnxTransformer\"\n", | |
"#r \"nuget:Microsoft.ML.OnnxConverter\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"using System;\n", | |
"using System.IO;\n", | |
"using System.Linq;\n", | |
"using Microsoft.ML;\n", | |
"using Microsoft.ML.OnnxRuntime;\n", | |
"using Microsoft.ML.Data;\n", | |
"using Microsoft.ML.Transforms;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var corpus = new [] \n", | |
"{\n", | |
"\tnew {Text = \"The quick brown fox jumped over the lazy dog. Dog is so lazy. Quick!\"},\n", | |
"\tnew {Text = \"The lazy dog was jumped over by the quick brown fox. Fox is not lazy\"}\n", | |
"};" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var ctx = new MLContext();" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var data = ctx.Data.LoadFromEnumerable(corpus);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var pipeline = ctx.Transforms.Text.ProduceWordBags (outputColumnName: \"WB\", inputColumnName:\"Text\", ngramLength:1);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var model = pipeline.Fit(data);" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
} | |
}, | |
"source": [ | |
"var mlnetmodelOutputDv = model.Transform(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var mlnetmodelOutputDv = model.Transform(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table><thead><tr><th><i>index</i></th><th>Name</th><th>Index</th><th>IsHidden</th><th>Type</th><th>Annotations</th></tr></thead><tbody><tr><td>0</td><td>Text</td><td><div class=\"dni-plaintext\">0</div></td><td><div class=\"dni-plaintext\">False</div></td><td><table><thead><tr><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">System.ReadOnlyMemory<System.Char></div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ ]</div></td></tr></tbody></table></td></tr><tr><td>1</td><td>WB</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">True</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 0 ]</div></td><td><div class=\"dni-plaintext\">False</div></td><td><div class=\"dni-plaintext\">{ String: RawType: System.ReadOnlyMemory<System.Char> }</div></td><td><div class=\"dni-plaintext\">0</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer<System.ReadOnlyMemory<System.Char>></div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ ]</div></td></tr></tbody></table></td></tr><tr><td>2</td><td>WB</td><td><div class=\"dni-plaintext\">2</div></td><td><div class=\"dni-plaintext\">True</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 0 ]</div></td><td><div class=\"dni-plaintext\">False</div></td><td><div class=\"dni-plaintext\">{ Key<UInt32, 0-19>: Count: 20, RawType: System.UInt32 }</div></td><td><div class=\"dni-plaintext\">0</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer<System.UInt32></div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ { KeyValues: Vector<String, 20>: Name: KeyValues, Index: 0, IsHidden: False, Type: { Vector<String, 20>: Dimensions: [ 20 ], IsKnownSize: True, ItemType: { String: RawType: System.ReadOnlyMemory<System.Char> }, Size: 20, RawType: Microsoft.ML.Data.VBuffer<System.ReadOnlyMemory<System.Char>> }, Annotations: { : Schema: [ ] } } ]</div></td></tr></tbody></table></td></tr><tr><td>3</td><td>WB</td><td><div class=\"dni-plaintext\">3</div></td><td><div class=\"dni-plaintext\">False</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 20 ]</div></td><td><div class=\"dni-plaintext\">True</div></td><td><div class=\"dni-plaintext\">{ Single: RawType: System.Single }</div></td><td><div class=\"dni-plaintext\">20</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer<System.Single></div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ { SlotNames: Vector<String, 20>: Name: SlotNames, Index: 0, IsHidden: False, Type: { Vector<String, 20>: Dimensions: [ 20 ], IsKnownSize: True, ItemType: { String: RawType: System.ReadOnlyMemory<System.Char> }, Size: 20, RawType: Microsoft.ML.Data.VBuffer<System.ReadOnlyMemory<System.Char>> }, Annotations: { : Schema: [ ] } } ]</div></td></tr></tbody></table></td></tr></tbody></table>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"mlnetmodelOutputDv.Schema" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var wordbagOutputmlnet = mlnetmodelOutputDv.GetColumn<float[]>(\"WB\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 ]</div></td></tr><tr><td>1</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ]</div></td></tr></tbody></table>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"wordbagOutputmlnet" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"using(FileStream stream = File.Create(\"./onnx_model.onnx\"))\n", | |
"{\n", | |
" ctx.Model.ConvertToOnnx(model, data, stream);\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var onnxModelPipeline = ctx.Transforms.ApplyOnnxModel(\"onnx_model.onnx\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var onnxOutputDv = onnxModelPipeline.Fit(data).Transform(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table><thead><tr><th><i>index</i></th><th>Name</th><th>Index</th><th>IsHidden</th><th>Type</th><th>Annotations</th></tr></thead><tbody><tr><td>0</td><td>Text</td><td><div class=\"dni-plaintext\">0</div></td><td><div class=\"dni-plaintext\">True</div></td><td><table><thead><tr><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">System.ReadOnlyMemory<System.Char></div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ ]</div></td></tr></tbody></table></td></tr><tr><td>1</td><td>Text</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">False</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 1, 1 ]</div></td><td><div class=\"dni-plaintext\">True</div></td><td><div class=\"dni-plaintext\">{ String: RawType: System.ReadOnlyMemory<System.Char> }</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer<System.ReadOnlyMemory<System.Char>></div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ ]</div></td></tr></tbody></table></td></tr><tr><td>2</td><td>WB</td><td><div class=\"dni-plaintext\">2</div></td><td><div class=\"dni-plaintext\">False</div></td><td><table><thead><tr><th>Dimensions</th><th>IsKnownSize</th><th>ItemType</th><th>Size</th><th>RawType</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ 1, 20 ]</div></td><td><div class=\"dni-plaintext\">True</div></td><td><div class=\"dni-plaintext\">{ Single: RawType: System.Single }</div></td><td><div class=\"dni-plaintext\">20</div></td><td><div class=\"dni-plaintext\">Microsoft.ML.Data.VBuffer<System.Single></div></td></tr></tbody></table></td><td><table><thead><tr><th>Schema</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">[ { SlotNames: Vector<String, 20>: Name: SlotNames, Index: 0, IsHidden: False, Type: { Vector<String, 20>: Dimensions: [ 20 ], IsKnownSize: True, ItemType: { String: RawType: System.ReadOnlyMemory<System.Char> }, Size: 20, RawType: Microsoft.ML.Data.VBuffer<System.ReadOnlyMemory<System.Char>> }, Annotations: { : Schema: [ ] } } ]</div></td></tr></tbody></table></td></tr></tbody></table>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"onnxOutputDv.Schema" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var wordBagOutputOnnx = onnxOutputDv.GetColumn<float[]>(\"WB\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 ]</div></td></tr><tr><td>1</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ]</div></td></tr></tbody></table>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"wordBagOutputOnnx" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var comparison = \n", | |
" wordbagOutputmlnet.Zip(wordBagOutputOnnx, (a,b) => \n", | |
" { \n", | |
" var difference = a.Zip(b,(c,d) => c-d);\n", | |
" return new {MLNET=a, ONNX=b,Difference=difference};\n", | |
" });" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"vscode": { | |
"languageId": "dotnet-interactive.csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table><thead><tr><th><i>index</i></th><th>MLNET</th><th>ONNX</th><th>Difference</th></tr></thead><tbody><tr><td>0</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 ]</div></td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 ]</div></td><td><div class=\"dni-plaintext\">[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ... (more) ]</div></td></tr><tr><td>1</td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ]</div></td><td><div class=\"dni-plaintext\">[ 1, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 ]</div></td><td><div class=\"dni-plaintext\">[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ... (more) ]</div></td></tr></tbody></table>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"comparison" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".NET (C#)", | |
"language": "C#", | |
"name": ".net-csharp" | |
}, | |
"language_info": { | |
"name": "C#" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment