Last active
May 29, 2021 12:09
-
-
Save genkuroki/6123aef79488bc20b52047656fc6f015 to your computer and use it in GitHub Desktop.
Octavian!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "https://discourse.julialang.org/t/intel-c-c-compiler-performance-versus-julia/61929/18" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "versioninfo()", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "Julia Version 1.7.0-DEV.1129\nCommit 9117b4d6d6 (2021-05-20 16:42 UTC)\nPlatform Info:\n OS: Windows (x86_64-w64-mingw32)\n CPU: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz\n WORD_SIZE: 64\n LIBM: libopenlibm\n LLVM: libLLVM-11.0.1 (ORCJIT, skylake)\nEnvironment:\n JULIA_NUM_THREADS = 12\n JULIA_PYTHONCALL_EXE = C:\\Users\\genkuroki\\.julia\\conda\\3\\python.exe\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using LinearAlgebra\nusing BLASBenchmarksCPU\nusing Octavian\nusing BenchmarkHistograms", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "M = K = N = 8\nA = rand(M, K)\nB = rand(K, N)\nC1 = @time(A * B)\nC0 = similar(C1);", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 0.611613 seconds (2.53 M allocations: 134.128 MiB, 7.43% gc time, 99.95% compilation time)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark mul!($C0, $A, $B)", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 714; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (177.0 - 197.0 ] \u001b[32m██████████████████████████████\u001b[39m7525\n (197.0 - 217.0 ] \u001b[32m██▊\u001b[39m677\n (217.0 - 237.0 ] \u001b[32m███▋\u001b[39m883\n (237.0 - 257.0 ] \u001b[32m█▍\u001b[39m318\n (257.0 - 277.0 ] \u001b[32m▋\u001b[39m145\n (277.0 - 297.0 ] \u001b[32m▊\u001b[39m163\n (297.0 - 317.0 ] \u001b[32m▍\u001b[39m92\n (317.0 - 337.0 ] \u001b[32m▎\u001b[39m58\n (337.0 - 357.0 ] \u001b[32m▎\u001b[39m42\n (357.0 - 377.0 ] \u001b[32m▏\u001b[39m31\n (377.0 - 397.0 ] \u001b[32m▏\u001b[39m30\n (397.0 - 417.0 ] \u001b[32m▏\u001b[39m7\n (417.0 - 437.0 ] \u001b[32m▏\u001b[39m11\n (437.0 - 457.0 ] \u001b[32m▏\u001b[39m8\n (457.0 - 1136.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 177.171 ns (0.00% GC); mean: 201.009 ns (0.00% GC); median: 189.636 ns (0.00% GC); max: 1.136 μs (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# MKL dgemm\n@benchmark gemmmkl!($C0, $A, $B)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 516; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (173.0 - 191.0] \u001b[32m██████████████████████████████ \u001b[39m8585\n (191.0 - 209.0] \u001b[32m█▊\u001b[39m499\n (209.0 - 226.0] \u001b[32m█▏\u001b[39m287\n (226.0 - 244.0] \u001b[32m▌\u001b[39m140\n (244.0 - 262.0] \u001b[32m▋\u001b[39m162\n (262.0 - 280.0] \u001b[32m▍\u001b[39m100\n (280.0 - 297.0] \u001b[32m▎\u001b[39m60\n (297.0 - 315.0] \u001b[32m▎\u001b[39m54\n (315.0 - 333.0] \u001b[32m▎\u001b[39m42\n (333.0 - 351.0] \u001b[32m▏\u001b[39m21\n (351.0 - 369.0] \u001b[32m▏\u001b[39m17\n (369.0 - 386.0] \u001b[32m▏\u001b[39m10\n (386.0 - 404.0] \u001b[32m▏\u001b[39m8\n (404.0 - 422.0] \u001b[32m▏\u001b[39m5\n (422.0 - 795.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 173.062 ns (0.00% GC); mean: 190.630 ns (0.00% GC); median: 183.721 ns (0.00% GC); max: 794.961 ns (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# MKL dgemm_direct\n@benchmark gemmmkl_direct!($C0, $A, $B)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 943; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (101.0 - 112.0] \u001b[32m██████████████████████████████▏\u001b[39m8682\n (112.0 - 122.0] \u001b[32m█\u001b[39m282\n (122.0 - 133.0] \u001b[32m█▏\u001b[39m300\n (133.0 - 144.0] \u001b[32m▋\u001b[39m149\n (144.0 - 154.0] \u001b[32m▊\u001b[39m214\n (154.0 - 165.0] \u001b[32m▍\u001b[39m98\n (165.0 - 176.0] \u001b[32m▍\u001b[39m76\n (176.0 - 187.0] \u001b[32m▎\u001b[39m49\n (187.0 - 197.0] \u001b[32m▎\u001b[39m52\n (197.0 - 208.0] \u001b[32m▏\u001b[39m36\n (208.0 - 219.0] \u001b[32m▏\u001b[39m15\n (219.0 - 229.0] \u001b[32m▏\u001b[39m25\n (229.0 - 240.0] \u001b[32m▏\u001b[39m9\n (240.0 - 251.0] \u001b[32m▏\u001b[39m3\n (251.0 - 342.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 100.848 ns (0.00% GC); mean: 111.745 ns (0.00% GC); median: 106.469 ns (0.00% GC); max: 342.312 ns (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Octavian.jl\n@benchmark matmul!($C0, $A, $B)", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 992; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (37.8 - 43.9 ] \u001b[32m██████████████████████████████ \u001b[39m9202\n (43.9 - 50.0 ] \u001b[32m█\u001b[39m270\n (50.0 - 56.1 ] \u001b[32m▍\u001b[39m113\n (56.1 - 62.1 ] \u001b[32m▍\u001b[39m92\n (62.1 - 68.2 ] \u001b[32m▎\u001b[39m69\n (68.2 - 74.3 ] \u001b[32m▎\u001b[39m46\n (74.3 - 80.4 ] \u001b[32m▎\u001b[39m48\n (80.4 - 86.5 ] \u001b[32m▎\u001b[39m49\n (86.5 - 92.6 ] \u001b[32m▏\u001b[39m33\n (92.6 - 98.6 ] \u001b[32m▏\u001b[39m24\n (98.6 - 104.7] \u001b[32m▏\u001b[39m18\n (104.7 - 110.8] \u001b[32m▏\u001b[39m13\n (110.8 - 116.9] \u001b[32m▏\u001b[39m4\n (116.9 - 123.0] \u001b[32m▏\u001b[39m9\n (123.0 - 153.2] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 37.802 ns (0.00% GC); mean: 40.487 ns (0.00% GC); median: 38.105 ns (0.00% GC); max: 153.226 ns (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using StaticArrays\nAm = MMatrix{M, K}(A)\nBm = MMatrix{K, N}(B)\nCm = similar(Am);", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark mul!($Cm, $Am, $Bm)", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 9, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 974; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (67.0 - 75.0 ] \u001b[32m██████████████████████████████ \u001b[39m9075\n (75.0 - 84.0 ] \u001b[32m▌\u001b[39m131\n (84.0 - 92.0 ] \u001b[32m▋\u001b[39m168\n (92.0 - 100.0] \u001b[32m▎\u001b[39m69\n (100.0 - 108.0] \u001b[32m▍\u001b[39m79\n (108.0 - 116.0] \u001b[32m▊\u001b[39m221\n (116.0 - 125.0] \u001b[32m▎\u001b[39m50\n (125.0 - 133.0] \u001b[32m▎\u001b[39m44\n (133.0 - 141.0] \u001b[32m▎\u001b[39m44\n (141.0 - 149.0] \u001b[32m▏\u001b[39m33\n (149.0 - 157.0] \u001b[32m▏\u001b[39m34\n (157.0 - 165.0] \u001b[32m▏\u001b[39m15\n (165.0 - 174.0] \u001b[32m▏\u001b[39m17\n (174.0 - 182.0] \u001b[32m▏\u001b[39m10\n (182.0 - 373.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 67.248 ns (0.00% GC); mean: 72.011 ns (0.00% GC); median: 67.967 ns (0.00% GC); max: 372.793 ns (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# StaticArrays.MMatrix are statically sized\n@benchmark matmul!($Cm, $Am, $Bm)", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 10, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 997; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (18.1 - 21.5] \u001b[32m██████████████████████████████\u001b[39m9264\n (21.5 - 25.0] \u001b[32m█▌\u001b[39m435\n (25.0 - 28.5] \u001b[32m▏\u001b[39m21\n (28.5 - 32.0] \u001b[32m▏\u001b[39m7\n (32.0 - 35.5] \u001b[32m▎\u001b[39m66\n (35.5 - 38.9] \u001b[32m▏\u001b[39m38\n (38.9 - 42.4] \u001b[32m▎\u001b[39m45\n (42.4 - 45.9] \u001b[32m▏\u001b[39m23\n (45.9 - 49.4] \u001b[32m▏\u001b[39m22\n (49.4 - 52.9] \u001b[32m▏\u001b[39m20\n (52.9 - 56.4] \u001b[32m▏\u001b[39m16\n (56.4 - 59.8] \u001b[32m▏\u001b[39m10\n (59.8 - 63.3] \u001b[32m▏\u001b[39m14\n (63.3 - 66.8] \u001b[32m▏\u001b[39m9\n (66.8 - 86.4] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 18.054 ns (0.00% GC); mean: 19.466 ns (0.00% GC); median: 18.255 ns (0.00% GC); max: 86.359 ns (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "julia-1.7-depwarn-o3", | |
"display_name": "Julia 1.7.0-DEV depwarn -O3", | |
"language": "julia" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"name": "julia", | |
"mimetype": "application/julia", | |
"version": "1.7.0" | |
}, | |
"@webio": { | |
"lastKernelId": null, | |
"lastCommId": null | |
}, | |
"gist": { | |
"id": "6123aef79488bc20b52047656fc6f015", | |
"data": { | |
"description": "Octavian!", | |
"public": true | |
} | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/6123aef79488bc20b52047656fc6f015" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment