Created
September 27, 2023 22:38
-
-
Save luisquintanilla/b1367fd8ba000e455b2e43a23dafa645 to your computer and use it in GitHub Desktop.
Define IDataView schema using anonymous types
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Install NuGet packages" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><div></div><div></div><div><strong>Installed Packages</strong><ul><li><span>Microsoft.ML, 3.0.0-preview.23266.6</span></li></ul></div></div>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"#r \"nuget: Microsoft.ML, 3.0.0-preview.23266.6\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"using Microsoft.ML;\n", | |
"using Microsoft.ML.Data;" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Define schema dynamically\n", | |
"\n", | |
"Set Vector size without using `VectorAttributeType`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var sd = SchemaDefinition.Create((new {Features=new float[1536]}).GetType());\n", | |
"sd[\"Features\"].ColumnType = new VectorDataViewType(NumberDataViewType.Single,1536);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>Microsoft.ML.Data.SchemaDefinition+Column</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>MemberName</td><td><div class=\"dni-plaintext\"><pre>Features</pre></div></td></tr><tr><td>ColumnName</td><td><div class=\"dni-plaintext\"><pre>Features</pre></div></td></tr><tr><td>ColumnType</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>Vector<Single, 1536></code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Dimensions</td><td><div class=\"dni-plaintext\"><pre>[ 1536 ]</pre></div></td></tr><tr><td>IsKnownSize</td><td><div class=\"dni-plaintext\"><pre>True</pre></div></td></tr><tr><td>ItemType</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>Single</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>RawType</td><td><span><a href=\"https://docs.microsoft.com/dotnet/api/system.single?view=net-7.0\">System.Single</a></span></td></tr></tbody></table></div></details></td></tr><tr><td>Size</td><td><div class=\"dni-plaintext\"><pre>1536</pre></div></td></tr><tr><td>RawType</td><td><span><a href=\"https://docs.microsoft.com/dotnet/api/microsoft.ml.data.vbuffer-1?view=net-7.0\">Microsoft.ML.Data.VBuffer<System.Single></a></span></td></tr></tbody></table></div></details></td></tr><tr><td>Annotations</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code></code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Schema</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>[ ]</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Count</td><td><div class=\"dni-plaintext\"><pre>0</pre></div></td></tr><tr><td><i>(values)</i></td><td><i>(empty)</i></td></tr></tbody></table></div></details></td></tr></tbody></table></div></details></td></tr></tbody></table></div></details></td></tr></tbody></table><style>\r\n", | |
".dni-code-hint {\r\n", | |
" font-style: italic;\r\n", | |
" overflow: hidden;\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview {\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview td {\r\n", | |
" vertical-align: top;\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"details.dni-treeview {\r\n", | |
" padding-left: 1em;\r\n", | |
"}\r\n", | |
"table td {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"table tr { \r\n", | |
" vertical-align: top; \r\n", | |
" margin: 0em 0px;\r\n", | |
"}\r\n", | |
"table tr td pre \r\n", | |
"{ \r\n", | |
" vertical-align: top !important; \r\n", | |
" margin: 0em 0px !important;\r\n", | |
"} \r\n", | |
"table th {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"</style>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"sd" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Mock data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var data = new [] {new {Features=Enumerable.Range(0,1536).Select(x => (float)x).ToArray()}};" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Features = System.Single[] }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Features</td><td><div class=\"dni-plaintext\"><pre>[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ... (1516 more) ]</pre></div></td></tr></tbody></table></div></details></td></tr></tbody></table><style>\r\n", | |
".dni-code-hint {\r\n", | |
" font-style: italic;\r\n", | |
" overflow: hidden;\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview {\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview td {\r\n", | |
" vertical-align: top;\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"details.dni-treeview {\r\n", | |
" padding-left: 1em;\r\n", | |
"}\r\n", | |
"table td {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"table tr { \r\n", | |
" vertical-align: top; \r\n", | |
" margin: 0em 0px;\r\n", | |
"}\r\n", | |
"table tr td pre \r\n", | |
"{ \r\n", | |
" vertical-align: top !important; \r\n", | |
" margin: 0em 0px !important;\r\n", | |
"} \r\n", | |
"table th {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"</style>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Initialize MLContext" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var ctx = new MLContext();" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load data into IDataView" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var dv = ctx.Data.LoadFromEnumerable(data,sd);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<details open=\"open\" class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>[ Features: Vector<Single, 1536> ]</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Count</td><td><div class=\"dni-plaintext\"><pre>1</pre></div></td></tr><tr><td><i>(values)</i></td><td><table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>Features: Vector<Single, 1536></code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Name</td><td><div class=\"dni-plaintext\"><pre>Features</pre></div></td></tr><tr><td>Index</td><td><div class=\"dni-plaintext\"><pre>0</pre></div></td></tr><tr><td>IsHidden</td><td><div class=\"dni-plaintext\"><pre>False</pre></div></td></tr><tr><td>Type</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>Vector<Single, 1536></code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Dimensions</td><td><div class=\"dni-plaintext\"><pre>[ 1536 ]</pre></div></td></tr><tr><td>IsKnownSize</td><td><div class=\"dni-plaintext\"><pre>True</pre></div></td></tr><tr><td>ItemType</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>Single</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>RawType</td><td><span><a href=\"https://docs.microsoft.com/dotnet/api/system.single?view=net-7.0\">System.Single</a></span></td></tr></tbody></table></div></details></td></tr><tr><td>Size</td><td><div class=\"dni-plaintext\"><pre>1536</pre></div></td></tr><tr><td>RawType</td><td><span><a href=\"https://docs.microsoft.com/dotnet/api/microsoft.ml.data.vbuffer-1?view=net-7.0\">Microsoft.ML.Data.VBuffer<System.Single></a></span></td></tr></tbody></table></div></details></td></tr><tr><td>Annotations</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code></code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Schema</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>[ ]</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Count</td><td><div class=\"dni-plaintext\"><pre>0</pre></div></td></tr><tr><td><i>(values)</i></td><td><i>(empty)</i></td></tr></tbody></table></div></details></td></tr></tbody></table></div></details></td></tr></tbody></table></div></details></td></tr></tbody></table></td></tr></tbody></table></div></details><style>\r\n", | |
".dni-code-hint {\r\n", | |
" font-style: italic;\r\n", | |
" overflow: hidden;\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview {\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview td {\r\n", | |
" vertical-align: top;\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"details.dni-treeview {\r\n", | |
" padding-left: 1em;\r\n", | |
"}\r\n", | |
"table td {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"table tr { \r\n", | |
" vertical-align: top; \r\n", | |
" margin: 0em 0px;\r\n", | |
"}\r\n", | |
"table tr td pre \r\n", | |
"{ \r\n", | |
" vertical-align: top !important; \r\n", | |
" margin: 0em 0px !important;\r\n", | |
"} \r\n", | |
"table th {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"</style>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dv.Schema" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".NET (C#)", | |
"language": "C#", | |
"name": ".net-csharp" | |
}, | |
"language_info": { | |
"name": "polyglot-notebook" | |
}, | |
"polyglot_notebook": { | |
"kernelInfo": { | |
"defaultKernelName": "csharp", | |
"items": [ | |
{ | |
"aliases": [], | |
"languageName": "csharp", | |
"name": "csharp" | |
} | |
] | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment