Last active
May 21, 2021 03:17
-
-
Save genkuroki/e73671622c6ee038286ce0fdb18a7407 to your computer and use it in GitHub Desktop.
sum of sin(x)/n
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "versioninfo()", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "Julia Version 1.7.0-DEV.1112\nCommit 8a0a9a7388 (2021-05-18 18:42 UTC)\nPlatform Info:\n OS: Windows (x86_64-w64-mingw32)\n CPU: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz\n WORD_SIZE: 64\n LIBM: libopenlibm\n LLVM: libLLVM-11.0.1 (ORCJIT, skylake)\nEnvironment:\n JULIA_NUM_THREADS = 12\n JULIA_PYTHONCALL_EXE = C:\\Users\\genkuroki\\.julia\\conda\\3\\python.exe\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using LoopVectorization, SLEEFPirates, BenchmarkTools", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "sin_ccall(x::Float64) = ccall(:sin, Float64, (Float64,), x)\n\nfunction f_ccall(N)\n y = 0.0\n for n in 1:N\n y += sin_ccall(Float64(n))/n\n end\n 2y + 1\nend\n\n@btime f_ccall(10^6)", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 31.395 ms (0 allocations: 0 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 3, | |
"data": { | |
"text/plain": "3.141590588883843" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "function f_naive(N)\n y = 0.0\n for n in 1:N\n y += sin(n)/n\n end\n 2y + 1\nend\n\n@btime f_naive(10^6)", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 11.739 ms (0 allocations: 0 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": "3.141590588883843" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "function f_simd(N)\n y = 0.0\n @simd for n in 1:N\n y += sin(n)/n\n end\n 2y + 1\nend\n\n@btime f_simd(10^6)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 11.932 ms (0 allocations: 0 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "3.141590588883843" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "function f_sleef(N)\n y = 0.0\n for n in 1:N\n y += SLEEFPirates.sin_fast(float(n))/n\n end\n 2y + 1\nend\n\n@btime f_sleef(10^6)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 6.512 ms (0 allocations: 0 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "3.1415905888838433" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "function f_simd_sleef(N)\n y = 0.0\n @simd for n in 1:N\n y += SLEEFPirates.sin_fast(float(n))/n\n end\n 2y + 1\nend\n\n@btime f_simd_sleef(10^6)", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 3.026 ms (0 allocations: 0 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": "3.141590588883785" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "function f_avx(N)\n y = 0.0\n @avx for n in 1:N\n y += sin(n)/n\n end\n 2y + 1\nend\n\n@btime f_avx(10^6)", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 1.609 ms (0 allocations: 0 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 8, | |
"data": { | |
"text/plain": "3.1415905888837843" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "function f_avx_sleef(N)\n y = 0.0\n @avx for n in 1:N\n y += SLEEFPirates.sin_fast(float(n))/n\n end\n 2y + 1\nend\n\n@btime f_avx_sleef(10^6)", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 2.347 ms (0 allocations: 0 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 9, | |
"data": { | |
"text/plain": "3.141590588883785" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "function f_avxt(N)\n y = 0.0\n @avxt for n in 1:N\n y += sin(n)/n\n end\n 2y + 1\nend\n\n@btime f_avxt(10^6)", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 375.300 μs (24 allocations: 464 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 10, | |
"data": { | |
"text/plain": "3.141590588883795" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "function f_avxt_sleef(N)\n y = 0.0\n @avxt for n in 1:N\n y += SLEEFPirates.sin_fast(float(n))/n\n end\n 2y + 1\nend\n\n@btime f_avxt_sleef(10^6)", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 476.200 μs (24 allocations: 464 bytes)\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 11, | |
"data": { | |
"text/plain": "3.14159058888379" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "print(\"f_ccall(10^6): \")\n@btime f_ccall(10^6)\nprint(\"f_naive(10^6): \")\n@btime f_naive(10^6)\nprint(\"f_simd(10^6): \")\n@btime f_simd(10^6)\nprint(\"f_sleef(10^6): \")\n@btime f_sleef(10^6)\nprint(\"f_simd_sleef(10^6): \")\n@btime f_simd_sleef(10^6)\nprint(\"f_avx(10^6): \")\n@btime f_avx(10^6)\nprint(\"f_avx_sleef(10^6): \")\n@btime f_avx_sleef(10^6)\nprint(\"f_avxt(10^6): \")\n@btime f_avxt(10^6)\nprint(\"f_avxt_sleef(10^6): \")\n@btime f_avxt_sleef(10^6);", | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "f_ccall(10^6): 31.138 ms (0 allocations: 0 bytes)\nf_naive(10^6): 11.924 ms (0 allocations: 0 bytes)\nf_simd(10^6): 11.913 ms (0 allocations: 0 bytes)\nf_sleef(10^6): 6.439 ms (0 allocations: 0 bytes)\nf_simd_sleef(10^6): 2.974 ms (0 allocations: 0 bytes)\nf_avx(10^6): 1.613 ms (0 allocations: 0 bytes)\nf_avx_sleef(10^6): 2.349 ms (0 allocations: 0 bytes)\nf_avxt(10^6): 353.200 μs (24 allocations: 464 bytes)\nf_avxt_sleef(10^6): 566.400 μs (24 allocations: 464 bytes)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "log_ccall(x::Float64) = ccall(:log, Float64, (Float64,), x)\n\nfunction g_naive(N)\n y = 0.0\n for n in 1:N\n y += log(n)\n end\n y\nend\n\nfunction g_ccall(N)\n y = 0.0\n for n in 1:N\n y += log_ccall(float(n))\n end\n y\nend\n\nfunction g_sleef(N)\n y = 0.0\n for n in 1:N\n y += SLEEFPirates.log_fast(float(n))\n end\n y\nend\n\nfunction g_simd_sleef(N)\n y = 0.0\n @simd for n in 1:N\n y += SLEEFPirates.log_fast(float(n))\n end\n y\nend\n\nfunction g_avx(N)\n y = 0.0\n @avx for n in 1:N\n y += log(n)\n end\n y\nend\n\nfunction g_avx_sleef(N)\n y = 0.0\n @avx for n in 1:N\n y += SLEEFPirates.log_fast(float(n))\n end\n y\nend\n\nfunction g_avxt(N)\n y = 0.0\n @avxt for n in 1:N\n y += log(n)\n end\n y\nend\n\nfunction g_avxt_sleef(N)\n y = 0.0\n @avxt for n in 1:N\n y += SLEEFPirates.log_fast(float(n))\n end\n y\nend\n\ng_stirling(N) = N*log(N) - N + log(N)/2 + log(2π)/2 + 1/(12N)\n\n@show g_naive(10^6)\n@show g_ccall(10^6)\n@show g_sleef(10^6)\n@show g_simd_sleef(10^6)\n@show g_avx(10^6)\n@show g_avx_sleef(10^6)\n@show g_avxt(10^6)\n@show g_avxt_sleef(10^6)\n@show g_stirling(10^6)\nprintln()\n\nprint(\"g_naive(10^6): \")\n@btime g_naive(10^6)\nprint(\"g_ccall(10^6): \")\n@btime g_ccall(10^6)\nprint(\"g_sleef(10^6): \")\n@btime g_sleef(10^6)\nprint(\"g_simd_sleef(10^6): \")\n@btime g_simd_sleef(10^6)\nprint(\"g_avx(10^6): \")\n@btime g_avx(10^6)\nprint(\"g_avx_sleef(10^6): \")\n@btime g_avx_sleef(10^6)\nprint(\"g_avxt(10^6): \")\n@btime g_avxt(10^6)\nprint(\"g_avxt_sleef(10^6): \")\n@btime g_avxt_sleef(10^6)\nprint(\"g_stirling(10^6): \")\n@btime g_stirling(10^6);", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "g_naive(10 ^ 6) = 1.2815518384657895e7\ng_ccall(10 ^ 6) = 1.2815518384657895e7\ng_sleef(10 ^ 6) = 1.2815518384657895e7\ng_simd_sleef(10 ^ 6) = 1.2815518384658162e7\ng_avx(10 ^ 6) = 1.281551838465816e7\ng_avx_sleef(10 ^ 6) = 1.2815518384658225e7\ng_avxt(10 ^ 6) = 1.2815518384658162e7\ng_avxt_sleef(10 ^ 6) = 1.2815518384658169e7\ng_stirling(10 ^ 6) = 1.2815518384658167e7\n\ng_naive(10^6): 10.776 ms (0 allocations: 0 bytes)\ng_ccall(10^6): 8.716 ms (0 allocations: 0 bytes)\ng_sleef(10^6): 4.236 ms (0 allocations: 0 bytes)\ng_simd_sleef(10^6): 1.539 ms (0 allocations: 0 bytes)\ng_avx(10^6): 2.289 ms (0 allocations: 0 bytes)\ng_avx_sleef(10^6): 1.859 ms (0 allocations: 0 bytes)\ng_avxt(10^6): 464.400 μs (24 allocations: 464 bytes)\ng_avxt_sleef(10^6): 346.100 μs (24 allocations: 464 bytes)\ng_stirling(10^6): 22.992 ns (0 allocations: 0 bytes)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"@webio": { | |
"lastKernelId": null, | |
"lastCommId": null | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/e73671622c6ee038286ce0fdb18a7407" | |
}, | |
"gist": { | |
"id": "e73671622c6ee038286ce0fdb18a7407", | |
"data": { | |
"description": "sum of sin(x)/n", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "julia-1.7-depwarn-o3", | |
"display_name": "Julia 1.7.0-DEV depwarn -O3", | |
"language": "julia" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"name": "julia", | |
"mimetype": "application/julia", | |
"version": "1.7.0" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment