Created
October 4, 2023 14:00
-
-
Save luisquintanilla/8b8aa7cb4ff07d53b6f27758329af063 to your computer and use it in GitHub Desktop.
Calculating Cosine Similarity with Different Implementations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Consine Similarity Samples\n", | |
"\n", | |
"Different examples showing how to compute cosine similarity using TorchSharp, a custom implementation, and tensor primitives." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var movies = new [] {\n", | |
" new {Title=\"The Lion King\", Embedding= new [] {0.10022575f, -0.23998135f}},\n", | |
" new {Title=\"Inception\", Embedding= new [] {0.10327095f, 0.2563685f}},\n", | |
" new {Title=\"Toy Story\", Embedding= new [] {0.095857024f, -0.201278f}},\n", | |
" new {Title=\"Pulp Function\", Embedding= new [] {0.106827796f, 0.21676421f}},\n", | |
" new {Title=\"Shrek\", Embedding= new [] {0.09568083f, -0.21177962f}}\n", | |
"};" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"// \"A movie that's fun for the whole family\" Embedding\n", | |
"var queryEmbedding = new[] {0.12217915f, -0.034832448f };" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## TorchSharp" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><div></div><div></div><div><strong>Installed Packages</strong><ul><li><span>TorchSharp-cpu, 0.100.5</span></li></ul></div></div>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"Loading extensions from `C:\\Users\\luquinta\\.nuget\\packages\\skiasharp\\2.88.3\\interactive-extensions\\dotnet\\SkiaSharp.DotNet.Interactive.dll`" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"#r \"nuget: TorchSharp-cpu, 0.100.5\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"using TorchSharp;\n", | |
"using static TorchSharp.torch.nn.functional;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var top3MoviesTorchSharp = \n", | |
" movies\n", | |
" .Select(movie => \n", | |
" new {\n", | |
" Title=movie.Title, \n", | |
" Embedding=movie.Embedding, \n", | |
" Similarity=cosine_similarity(torch.tensor(queryEmbedding),torch.tensor(movie.Embedding),0L).item<float>()})\n", | |
" .OrderByDescending(movies => movies.Similarity)\n", | |
" .Take(3);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<details open=\"open\" class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>[ { Title = Toy Story, Embedding = System.Single[], Similarity = 0.66102695 }, { Title = Shrek, Embedding = System.Single[], Similarity = 0.6457999 }, { Title = The Lion King, Embedding = System.Single[], Similarity = 0.62360466 } ]</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td><i>(values)</i></td><td><table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = Toy Story, Embedding = System.Single[], Similarity = 0.66102695 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>Toy Story</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.095857024, -0.201278 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.66102695</pre></div></td></tr></tbody></table></div></details></td></tr><tr><td>1</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = Shrek, Embedding = System.Single[], Similarity = 0.6457999 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>Shrek</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.09568083, -0.21177962 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.6457999</pre></div></td></tr></tbody></table></div></details></td></tr><tr><td>2</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = The Lion King, Embedding = System.Single[], Similarity = 0.62360466 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>The Lion King</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.10022575, -0.23998135 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.62360466</pre></div></td></tr></tbody></table></div></details></td></tr></tbody></table></td></tr></tbody></table></div></details><style>\r\n", | |
".dni-code-hint {\r\n", | |
" font-style: italic;\r\n", | |
" overflow: hidden;\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview {\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview td {\r\n", | |
" vertical-align: top;\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"details.dni-treeview {\r\n", | |
" padding-left: 1em;\r\n", | |
"}\r\n", | |
"table td {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"table tr { \r\n", | |
" vertical-align: top; \r\n", | |
" margin: 0em 0px;\r\n", | |
"}\r\n", | |
"table tr td pre \r\n", | |
"{ \r\n", | |
" vertical-align: top !important; \r\n", | |
" margin: 0em 0px !important;\r\n", | |
"} \r\n", | |
"table th {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"</style>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"top3MoviesTorchSharp" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Custom Implementation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"public float CosineSimilarityCustom(ReadOnlySpan<float> vector1, ReadOnlySpan<float> vector2)\n", | |
"{\n", | |
" if (vector1.Length != vector2.Length)\n", | |
" throw new ArgumentException(\"Vectors must have the same length\");\n", | |
"\n", | |
" float dotProduct = 0f;\n", | |
" float magnitude1 = 0f;\n", | |
" float magnitude2 = 0f;\n", | |
"\n", | |
" for (int i = 0; i < vector1.Length; i++)\n", | |
" {\n", | |
" dotProduct += vector1[i] * vector2[i];\n", | |
" magnitude1 += vector1[i] * vector1[i];\n", | |
" magnitude2 += vector2[i] * vector2[i];\n", | |
" }\n", | |
"\n", | |
" magnitude1 = MathF.Sqrt(magnitude1);\n", | |
" magnitude2 = MathF.Sqrt(magnitude2);\n", | |
"\n", | |
" if (magnitude1 == 0 || magnitude2 == 0)\n", | |
" return 0; // handle the case where one or both vectors have zero magnitude\n", | |
"\n", | |
" return dotProduct / (magnitude1 * magnitude2);\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var top3MoviesNaive = \n", | |
" movies\n", | |
" .Select(movie => \n", | |
" new {\n", | |
" Title=movie.Title, \n", | |
" Embedding=movie.Embedding, \n", | |
" Similarity=CosineSimilarityCustom(queryEmbedding.AsSpan(),movie.Embedding.AsSpan())})\n", | |
" .OrderByDescending(movies => movies.Similarity)\n", | |
" .Take(3);\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<details open=\"open\" class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>[ { Title = Toy Story, Embedding = System.Single[], Similarity = 0.66102695 }, { Title = Shrek, Embedding = System.Single[], Similarity = 0.64579993 }, { Title = The Lion King, Embedding = System.Single[], Similarity = 0.6236047 } ]</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td><i>(values)</i></td><td><table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = Toy Story, Embedding = System.Single[], Similarity = 0.66102695 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>Toy Story</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.095857024, -0.201278 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.66102695</pre></div></td></tr></tbody></table></div></details></td></tr><tr><td>1</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = Shrek, Embedding = System.Single[], Similarity = 0.64579993 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>Shrek</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.09568083, -0.21177962 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.64579993</pre></div></td></tr></tbody></table></div></details></td></tr><tr><td>2</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = The Lion King, Embedding = System.Single[], Similarity = 0.6236047 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>The Lion King</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.10022575, -0.23998135 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.6236047</pre></div></td></tr></tbody></table></div></details></td></tr></tbody></table></td></tr></tbody></table></div></details><style>\r\n", | |
".dni-code-hint {\r\n", | |
" font-style: italic;\r\n", | |
" overflow: hidden;\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview {\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview td {\r\n", | |
" vertical-align: top;\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"details.dni-treeview {\r\n", | |
" padding-left: 1em;\r\n", | |
"}\r\n", | |
"table td {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"table tr { \r\n", | |
" vertical-align: top; \r\n", | |
" margin: 0em 0px;\r\n", | |
"}\r\n", | |
"table tr td pre \r\n", | |
"{ \r\n", | |
" vertical-align: top !important; \r\n", | |
" margin: 0em 0px !important;\r\n", | |
"} \r\n", | |
"table th {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"</style>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"top3MoviesNaive" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Tensor Primitives" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"#r \"C:\\Users\\luquinta\\Downloads\\System.Numerics.Tensors\\netstandard\\System.Numerics.Tensors.dll\"\n", | |
"#r \"C:\\Users\\luquinta\\Downloads\\Microsoft.Bcl.Numerics\\Microsoft.Bcl.Numerics.dll\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"using System.Numerics.Tensors;" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"var top3MoviesNaive = \n", | |
" movies\n", | |
" .Select(movie => \n", | |
" new {\n", | |
" Title=movie.Title, \n", | |
" Embedding=movie.Embedding, \n", | |
" Similarity=TensorPrimitives.CosineSimilarity(queryEmbedding.AsSpan(),movie.Embedding.AsSpan())})\n", | |
" .OrderByDescending(movies => movies.Similarity)\n", | |
" .Take(3);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"dotnet_interactive": { | |
"language": "csharp" | |
}, | |
"polyglot_notebook": { | |
"kernelName": "csharp" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<details open=\"open\" class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>[ { Title = Toy Story, Embedding = System.Single[], Similarity = 0.66102695 }, { Title = Shrek, Embedding = System.Single[], Similarity = 0.64579993 }, { Title = The Lion King, Embedding = System.Single[], Similarity = 0.6236047 } ]</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td><i>(values)</i></td><td><table><thead><tr><th><i>index</i></th><th>value</th></tr></thead><tbody><tr><td>0</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = Toy Story, Embedding = System.Single[], Similarity = 0.66102695 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>Toy Story</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.095857024, -0.201278 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.66102695</pre></div></td></tr></tbody></table></div></details></td></tr><tr><td>1</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = Shrek, Embedding = System.Single[], Similarity = 0.64579993 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>Shrek</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.09568083, -0.21177962 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.64579993</pre></div></td></tr></tbody></table></div></details></td></tr><tr><td>2</td><td><details class=\"dni-treeview\"><summary><span class=\"dni-code-hint\"><code>{ Title = The Lion King, Embedding = System.Single[], Similarity = 0.6236047 }</code></span></summary><div><table><thead><tr></tr></thead><tbody><tr><td>Title</td><td><div class=\"dni-plaintext\"><pre>The Lion King</pre></div></td></tr><tr><td>Embedding</td><td><div class=\"dni-plaintext\"><pre>[ 0.10022575, -0.23998135 ]</pre></div></td></tr><tr><td>Similarity</td><td><div class=\"dni-plaintext\"><pre>0.6236047</pre></div></td></tr></tbody></table></div></details></td></tr></tbody></table></td></tr></tbody></table></div></details><style>\r\n", | |
".dni-code-hint {\r\n", | |
" font-style: italic;\r\n", | |
" overflow: hidden;\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview {\r\n", | |
" white-space: nowrap;\r\n", | |
"}\r\n", | |
".dni-treeview td {\r\n", | |
" vertical-align: top;\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"details.dni-treeview {\r\n", | |
" padding-left: 1em;\r\n", | |
"}\r\n", | |
"table td {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"table tr { \r\n", | |
" vertical-align: top; \r\n", | |
" margin: 0em 0px;\r\n", | |
"}\r\n", | |
"table tr td pre \r\n", | |
"{ \r\n", | |
" vertical-align: top !important; \r\n", | |
" margin: 0em 0px !important;\r\n", | |
"} \r\n", | |
"table th {\r\n", | |
" text-align: start;\r\n", | |
"}\r\n", | |
"</style>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"top3MoviesNaive" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".NET (C#)", | |
"language": "C#", | |
"name": ".net-csharp" | |
}, | |
"language_info": { | |
"name": "polyglot-notebook" | |
}, | |
"polyglot_notebook": { | |
"kernelInfo": { | |
"defaultKernelName": "csharp", | |
"items": [ | |
{ | |
"aliases": [], | |
"languageName": "csharp", | |
"name": "csharp" | |
} | |
] | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment