Last active
August 29, 2015 14:14
-
-
Save simonbyrne/62f25608aaf831de5996 to your computer and use it in GitHub Desktop.
Comparison of methods for zeroing out lower order bits of a floating point number
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use floating point AND instructions (andps/andpd) | |
# https://github.com/JuliaLang/julia/issues/9868 | |
function and_float(x::Float64,y::Float64) | |
Base.llvmcall("""%av = insertelement<2 x double> undef, double %0, i32 0 | |
%bv = insertelement<2 x double> undef, double %1, i32 0 | |
%ai = bitcast <2 x double> %av to <2 x i64> | |
%bi = bitcast <2 x double> %bv to <2 x i64> | |
%and.i = and <2 x i64> %ai, %bi | |
%cf = bitcast <2 x i64> %and.i to <2 x double> | |
%cfe = extractelement<2 x double> %cf, i32 0 | |
ret double %cfe""",Float64,(Float64,Float64),x,y) | |
end | |
# 1) convert to and from float32 | |
t1(x::Float64) = float64(float32(x)) | |
# 2) Dekker (1971) trick: known as Veltkamp splitting | |
const half64 = 1.34217729e8 | |
t2(x::Float64) = (p = x*half64; (x-p)+p) | |
# 3) AND out bits | |
t3(x::Float64) = reinterpret(Float64,reinterpret(UInt64,x) & 0xffff_ffff_0000_0000) | |
# 4) AND out bits using and_float | |
t4(x::Float64) = and_float(x,reinterpret(Float64, 0xffff_ffff_0000_0000)) | |
function test1(N) | |
s = 0.0 | |
for i = 1:N | |
s += 1.323 | |
s = t1(s) | |
end | |
s | |
end | |
function test2(N) | |
s = 0.0 | |
for i = 1:N | |
s += 1.323 | |
s = t2(s) | |
end | |
s | |
end | |
function test3(N) | |
s = 0.0 | |
for i = 1:N | |
s += 1.323 | |
s = t3(s) | |
end | |
s | |
end | |
function test4(N) | |
s = 0.0 | |
for i = 1:N | |
s += 1.323 | |
s = t4(s) | |
end | |
s | |
end | |
@time test1(100_000_000); | |
@time test2(100_000_000); | |
@time test3(100_000_000); | |
@time test4(100_000_000); | |
# after warmup: | |
# julia> @time test1(100_000_000); | |
# elapsed time: 0.324144731 seconds (96 bytes allocated) | |
# julia> @time test2(100_000_000); | |
# elapsed time: 0.525205335 seconds (96 bytes allocated) | |
# julia> @time test3(100_000_000); | |
# elapsed time: 0.220910499 seconds (96 bytes allocated) | |
# julia> @time test4(100_000_000); | |
# elapsed time: 0.14726621 seconds (96 bytes allocated) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment