Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save genkuroki/429f47a458e67e2e4d5a49d17f1c64bd to your computer and use it in GitHub Desktop.
Save genkuroki/429f47a458e67e2e4d5a49d17f1c64bd to your computer and use it in GitHub Desktop.
invsqrt_reinterpret - fast inverse square root
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "For details, see \n\n* https://en.m.wikipedia.org/wiki/Fast_inverse_square_root\n* https://stackoverflow.com/questions/11644441/fast-inverse-square-root-on-x64/11644533"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "using BenchmarkTools\n\nsqrt_julia(x) = √x\n\nsqrt_newton(x, y) = (y + x/y)/2\nsqrt_newton2(x, y) = (y = sqrt_newton(x, y); sqrt_newton(x, y))\nsqrt_newton3(x, y) = (y = sqrt_newton2(x, y); sqrt_newton(x, y))\nsqrt_newton4(x, y) = (y = sqrt_newton3(x, y); sqrt_newton(x, y))\n\nfunction invsqrt_reinterpret(x::Float32)\n X = reinterpret(UInt32, x)\n Y = 0x5F3759DF - (X >> 1)\n y = reinterpret(Float32, Y)\nend\n\nfunction sqrt_fast(x::Float32)\n y = 1/invsqrt_reinterpret(x)\n y = sqrt_newton3(x, y)\nend\n\nfunction invsqrt_reinterpret(x::Float64)\n X = reinterpret(UInt64, x)\n Y = 0x5fe6eb50c7b537a9 - (X >> 1)\n y = reinterpret(Float64, Y)\nend\n\nfunction sqrt_fast(x::Float64)\n y = 1/invsqrt_reinterpret(x)\n y = sqrt_newton4(x, y)\nend",
"execution_count": 1,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 1,
"data": {
"text/plain": "sqrt_fast (generic function with 2 methods)"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "@show eps(Float32)\n\nx = Float32.(1:10^6)\nx = [inv.(reverse(x)[begin:end-1]); x]\n\na = sqrt_julia.(x)\nb = 1 ./ invsqrt_reinterpret.(x)\nc = sqrt_fast.(x)\n@show extrema(b ./ a .- 1)\n@show extrema(c ./ a .- 1)\nprintln()\n\n@btime sqrt_julia.($x)\n@btime 1 ./ invsqrt_reinterpret.($x)\n@btime sqrt_fast.($x);",
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": "eps(Float32) = 1.1920929f-7\nextrema(b ./ a .- 1) = (-0.0328449f0, 0.03559959f0)\nextrema(c ./ a .- 1) = (-1.1920929f-7, 1.1920929f-7)\n\n 2.225 ms (2 allocations: 7.63 MiB)\n 1.493 ms (2 allocations: 7.63 MiB)\n 7.749 ms (2 allocations: 7.63 MiB)\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "@show eps(Float64)\n\nx = Float64.(1:10^6)\nx = [inv.(reverse(x)[begin:end-1]); x]\n\na = sqrt_julia.(x)\nb = 1 ./ invsqrt_reinterpret.(x)\nc = sqrt_fast.(x)\n@show extrema(b ./ a .- 1)\n@show extrema(c ./ a .- 1)\nprintln()\n\n@btime sqrt_julia.($x)\n@btime 1 ./ invsqrt_reinterpret.($x)\n@btime sqrt_fast.($x);",
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": "eps(Float64) = 2.220446049250313e-16\nextrema(b ./ a .- 1) = (-0.03285974749257892, 0.035588455345643366)\nextrema(c ./ a .- 1) = (-2.220446049250313e-16, 2.220446049250313e-16)\n\n 4.480 ms (2 allocations: 15.26 MiB)\n 3.151 ms (2 allocations: 15.26 MiB)\n 12.049 ms (2 allocations: 15.26 MiB)\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "invsqrt_newton(x, y) = y*(3 - x*y^2)/2\ninvsqrt_newton2(x, y) = (y = invsqrt_newton(x, y); invsqrt_newton(x, y))\ninvsqrt_newton3(x, y) = (y = invsqrt_newton2(x, y); invsqrt_newton(x, y))\ninvsqrt_newton4(x, y) = (y = invsqrt_newton3(x, y); invsqrt_newton(x, y))\n\nfunction invsqrt_fast1(x::Float64)\n y = invsqrt_reinterpret(x)\n y = invsqrt_newton(x, y)\nend\n\nfunction sqrt_fast1(x::Float64)\n y = 1/invsqrt_reinterpret(x)\n y = sqrt_newton(x, y)\nend\n\nfunction invsqrt_fast2(x::Float64)\n y = invsqrt_reinterpret(x)\n y = invsqrt_newton2(x, y)\nend\n\nfunction sqrt_fast2(x::Float64)\n y = 1/invsqrt_reinterpret(x)\n y = sqrt_newton2(x, y)\nend\n\nfunction invsqrt_fast3(x::Float64)\n y = invsqrt_reinterpret(x)\n y = invsqrt_newton3(x, y)\nend\n\nfunction sqrt_fast3(x::Float64)\n y = 1/invsqrt_reinterpret(x)\n y = sqrt_newton3(x, y)\nend\n\nfunction invsqrt_fast4(x::Float64)\n y = invsqrt_reinterpret(x)\n y = invsqrt_newton4(x, y)\nend\n\nfunction sqrt_fast4(x::Float64)\n y = 1/invsqrt_reinterpret(x)\n y = sqrt_newton4(x, y)\nend\n\nx = Float64.(1:10^6)\nx = [inv.(reverse(x)[begin:end-1]); x]\n\na = sqrt_julia.(x)\n\nb1 = inv.(invsqrt_fast1.(x))\nc1 = sqrt_fast1.(x)\n@show mean(abs, b1 ./ a .- 1)\n@show mean(abs, c1 ./ a .- 1);\n\nb2 = inv.(invsqrt_fast2.(x))\nc2 = sqrt_fast2.(x)\n@show mean(abs, b2 ./ a .- 1)\n@show mean(abs, c2 ./ a .- 1);\n\nb3 = inv.(invsqrt_fast3.(x))\nc3 = sqrt_fast3.(x)\n@show mean(abs, b3 ./ a .- 1)\n@show mean(abs, c3 ./ a .- 1);\n\nb4 = inv.(invsqrt_fast4.(x))\nc4 = sqrt_fast4.(x)\n@show mean(abs, b4 ./ a .- 1)\n@show mean(abs, c4 ./ a .- 1);",
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": "mean(abs, b1 ./ a .- 1) = 0.0009016601403790209\nmean(abs, c1 ./ a .- 1) = 0.000292785109371075\nmean(abs, b2 ./ a .- 1) = 1.7285656814890923e-6\nmean(abs, c2 ./ a .- 1) = 6.079679525268528e-8\nmean(abs, b3 ./ a .- 1) = 8.258291374023448e-12\nmean(abs, c3 ./ a .- 1) = 3.4384236552757456e-15\nmean(abs, b4 ./ a .- 1) = 8.285998256065536e-17\nmean(abs, c4 ./ a .- 1) = 4.7287419864661836e-17\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"@webio": {
"lastKernelId": null,
"lastCommId": null
},
"_draft": {
"nbviewer_url": "https://gist.github.com/429f47a458e67e2e4d5a49d17f1c64bd"
},
"gist": {
"id": "429f47a458e67e2e4d5a49d17f1c64bd",
"data": {
"description": "invsqrt_reinterpret - fast inverse square root",
"public": true
}
},
"kernelspec": {
"name": "julia-1.7-depwarn-o3",
"display_name": "Julia 1.7.0-DEV depwarn -O3",
"language": "julia"
},
"language_info": {
"file_extension": ".jl",
"name": "julia",
"mimetype": "application/julia",
"version": "1.7.0"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment