Last active
May 29, 2021 12:09
-
-
Save genkuroki/6123aef79488bc20b52047656fc6f015 to your computer and use it in GitHub Desktop.
Octavian!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "https://discourse.julialang.org/t/intel-c-c-compiler-performance-versus-julia/61929/18" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "versioninfo()", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "Julia Version 1.7.0-DEV.1129\nCommit 9117b4d6d6 (2021-05-20 16:42 UTC)\nPlatform Info:\n OS: Windows (x86_64-w64-mingw32)\n CPU: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz\n WORD_SIZE: 64\n LIBM: libopenlibm\n LLVM: libLLVM-11.0.1 (ORCJIT, skylake)\nEnvironment:\n JULIA_NUM_THREADS = 12\n JULIA_PYTHONCALL_EXE = C:\\Users\\genkuroki\\.julia\\conda\\3\\python.exe\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using LinearAlgebra\nusing BLASBenchmarksCPU\nusing Octavian\nusing BenchmarkHistograms", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "M = K = N = 32\nA = rand(M, K)\nB = rand(K, N)\nC1 = @time(A * B)\nC0 = similar(C1);", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 0.539203 seconds (2.53 M allocations: 134.135 MiB, 7.92% gc time, 99.95% compilation time)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark mul!($C0, $A, $B)", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 9; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (2290.0 - 2700.0 ] \u001b[32m██████████████████████████████ \u001b[39m9341\n (2700.0 - 3100.0 ] \u001b[32m█▏\u001b[39m350\n (3100.0 - 3510.0 ] \u001b[32m▏\u001b[39m8\n (3510.0 - 3920.0 ] \u001b[32m▏\u001b[39m19\n (3920.0 - 4330.0 ] \u001b[32m▍\u001b[39m79\n (4330.0 - 4740.0 ] \u001b[32m▎\u001b[39m51\n (4740.0 - 5140.0 ] \u001b[32m▏\u001b[39m38\n (5140.0 - 5550.0 ] \u001b[32m▏\u001b[39m16\n (5550.0 - 5960.0 ] \u001b[32m▏\u001b[39m31\n (5960.0 - 6370.0 ] \u001b[32m▏\u001b[39m20\n (6370.0 - 6780.0 ] \u001b[32m▏\u001b[39m14\n (6780.0 - 7180.0 ] \u001b[32m▏\u001b[39m15\n (7180.0 - 7590.0 ] \u001b[32m▏\u001b[39m5\n (7590.0 - 8000.0 ] \u001b[32m▏\u001b[39m3\n (8000.0 - 10600.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 2.289 μs (0.00% GC); mean: 2.426 μs (0.00% GC); median: 2.311 μs (0.00% GC); max: 10.600 μs (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# MKL dgemm\n@benchmark gemmmkl!($C0, $A, $B)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 10; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (1290.0 - 1600.0] \u001b[32m██████████████████████████████ \u001b[39m9490\n (1600.0 - 1910.0] \u001b[32m█▎\u001b[39m356\n (1910.0 - 2220.0] \u001b[32m▏\u001b[39m4\n (2220.0 - 2530.0] \u001b[32m▏\u001b[39m2\n (2530.0 - 2840.0] \u001b[32m▏\u001b[39m17\n (2840.0 - 3150.0] \u001b[32m▎\u001b[39m40\n (3150.0 - 3470.0] \u001b[32m▏\u001b[39m11\n (3470.0 - 3780.0] \u001b[32m▏\u001b[39m16\n (3780.0 - 4090.0] \u001b[32m▏\u001b[39m6\n (4090.0 - 4400.0] \u001b[32m▏\u001b[39m10\n (4400.0 - 4710.0] \u001b[32m▏\u001b[39m14\n (4710.0 - 5020.0] \u001b[32m▏\u001b[39m8\n (5020.0 - 5330.0] \u001b[32m▏\u001b[39m9\n (5330.0 - 5640.0] \u001b[32m▏\u001b[39m7\n (5640.0 - 6950.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 1.290 μs (0.00% GC); mean: 1.370 μs (0.00% GC); median: 1.300 μs (0.00% GC); max: 6.950 μs (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# MKL dgemm_direct\n@benchmark gemmmkl_direct!($C0, $A, $B)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 10; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (1200.0 - 1520.0] \u001b[32m██████████████████████████████\u001b[39m9321\n (1520.0 - 1830.0] \u001b[32m█▌\u001b[39m430\n (1830.0 - 2150.0] \u001b[32m▏\u001b[39m19\n (2150.0 - 2470.0] \u001b[32m▏\u001b[39m25\n (2470.0 - 2780.0] \u001b[32m▏\u001b[39m34\n (2780.0 - 3100.0] \u001b[32m▎\u001b[39m47\n (3100.0 - 3420.0] \u001b[32m▏\u001b[39m29\n (3420.0 - 3730.0] \u001b[32m▏\u001b[39m31\n (3730.0 - 4050.0] \u001b[32m▏\u001b[39m14\n (4050.0 - 4360.0] \u001b[32m▏\u001b[39m7\n (4360.0 - 4680.0] \u001b[32m▏\u001b[39m7\n (4680.0 - 5000.0] \u001b[32m▏\u001b[39m8\n (5000.0 - 5310.0] \u001b[32m▏\u001b[39m10\n (5310.0 - 5630.0] \u001b[32m▏\u001b[39m8\n (5630.0 - 7420.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 1.200 μs (0.00% GC); mean: 1.302 μs (0.00% GC); median: 1.220 μs (0.00% GC); max: 7.420 μs (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Octavian.jl\n@benchmark matmul!($C0, $A, $B)", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 142; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (700.0 - 790.0 ] \u001b[32m██████████████████████████████ \u001b[39m8213\n (790.0 - 870.0 ] \u001b[32m██▌\u001b[39m661\n (870.0 - 960.0 ] \u001b[32m█\u001b[39m249\n (960.0 - 1040.0] \u001b[32m█▏\u001b[39m283\n (1040.0 - 1130.0] \u001b[32m▋\u001b[39m152\n (1130.0 - 1210.0] \u001b[32m▋\u001b[39m137\n (1210.0 - 1300.0] \u001b[32m▍\u001b[39m82\n (1300.0 - 1380.0] \u001b[32m▍\u001b[39m78\n (1380.0 - 1470.0] \u001b[32m▎\u001b[39m48\n (1470.0 - 1550.0] \u001b[32m▏\u001b[39m31\n (1550.0 - 1640.0] \u001b[32m▏\u001b[39m10\n (1640.0 - 1720.0] \u001b[32m▏\u001b[39m13\n (1720.0 - 1800.0] \u001b[32m▏\u001b[39m20\n (1800.0 - 1890.0] \u001b[32m▏\u001b[39m13\n (1890.0 - 2380.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 704.225 ns (0.00% GC); mean: 781.582 ns (0.00% GC); median: 727.465 ns (0.00% GC); max: 2.385 μs (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using StaticArrays\nAm = MMatrix{M, K}(A)\nBm = MMatrix{K, N}(B)\nCm = similar(Am);", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark mul!($Cm, $Am, $Bm)", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 9, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 9; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (2270.0 - 2700.0 ] \u001b[32m██████████████████████████████▏\u001b[39m7222\n (2700.0 - 3130.0 ] \u001b[32m███▌\u001b[39m826\n (3130.0 - 3570.0 ] \u001b[32m█▍\u001b[39m329\n (3570.0 - 4000.0 ] \u001b[32m███▉\u001b[39m913\n (4000.0 - 4440.0 ] \u001b[32m█▋\u001b[39m362\n (4440.0 - 4870.0 ] \u001b[32m▋\u001b[39m123\n (4870.0 - 5310.0 ] \u001b[32m▎\u001b[39m52\n (5310.0 - 5740.0 ] \u001b[32m▎\u001b[39m32\n (5740.0 - 6170.0 ] \u001b[32m▏\u001b[39m27\n (6170.0 - 6610.0 ] \u001b[32m▎\u001b[39m31\n (6610.0 - 7040.0 ] \u001b[32m▏\u001b[39m23\n (7040.0 - 7480.0 ] \u001b[32m▏\u001b[39m11\n (7480.0 - 7910.0 ] \u001b[32m▏\u001b[39m25\n (7910.0 - 8340.0 ] \u001b[32m▏\u001b[39m14\n (8340.0 - 33570.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 2.267 μs (0.00% GC); mean: 2.850 μs (0.00% GC); median: 2.511 μs (0.00% GC); max: 33.567 μs (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# StaticArrays.MMatrix are statically sized\n@benchmark matmul!($Cm, $Am, $Bm)", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 10, | |
"data": { | |
"text/plain": "samples: 10000; evals/sample: 157; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (670.0 - 760.0 ] \u001b[32m██████████████████████████████ \u001b[39m7928\n (760.0 - 850.0 ] \u001b[32m█▍\u001b[39m360\n (850.0 - 940.0 ] \u001b[32m█▏\u001b[39m277\n (940.0 - 1030.0] \u001b[32m█▊\u001b[39m441\n (1030.0 - 1130.0] \u001b[32m█\u001b[39m257\n (1130.0 - 1220.0] \u001b[32m█▋\u001b[39m417\n (1220.0 - 1310.0] \u001b[32m▌\u001b[39m119\n (1310.0 - 1400.0] \u001b[32m▍\u001b[39m71\n (1400.0 - 1490.0] \u001b[32m▎\u001b[39m55\n (1490.0 - 1580.0] \u001b[32m▏\u001b[39m23\n (1580.0 - 1680.0] \u001b[32m▏\u001b[39m17\n (1680.0 - 1770.0] \u001b[32m▏\u001b[39m14\n (1770.0 - 1860.0] \u001b[32m▏\u001b[39m7\n (1860.0 - 1950.0] \u001b[32m▏\u001b[39m4\n (1950.0 - 2730.0] \u001b[32m▏\u001b[39m10\n\n Counts\n\nmin: 668.790 ns (0.00% GC); mean: 785.391 ns (0.00% GC); median: 714.013 ns (0.00% GC); max: 2.734 μs (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"@webio": { | |
"lastKernelId": null, | |
"lastCommId": null | |
}, | |
"kernelspec": { | |
"name": "julia-1.7-depwarn-o3", | |
"display_name": "Julia 1.7.0-DEV depwarn -O3", | |
"language": "julia" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"name": "julia", | |
"mimetype": "application/julia", | |
"version": "1.7.0" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"gist": { | |
"id": "6123aef79488bc20b52047656fc6f015", | |
"data": { | |
"description": "Octavian", | |
"public": true | |
} | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/6123aef79488bc20b52047656fc6f015" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment