Skip to content

Instantly share code, notes, and snippets.

@simonbyrne
Last active August 29, 2015 14:14
Show Gist options
  • Save simonbyrne/62f25608aaf831de5996 to your computer and use it in GitHub Desktop.
Save simonbyrne/62f25608aaf831de5996 to your computer and use it in GitHub Desktop.
Comparison of methods for zeroing out lower order bits of a floating point number
# use floating point AND instructions (andps/andpd)
# https://github.com/JuliaLang/julia/issues/9868
function and_float(x::Float64,y::Float64)
Base.llvmcall("""%av = insertelement<2 x double> undef, double %0, i32 0
%bv = insertelement<2 x double> undef, double %1, i32 0
%ai = bitcast <2 x double> %av to <2 x i64>
%bi = bitcast <2 x double> %bv to <2 x i64>
%and.i = and <2 x i64> %ai, %bi
%cf = bitcast <2 x i64> %and.i to <2 x double>
%cfe = extractelement<2 x double> %cf, i32 0
ret double %cfe""",Float64,(Float64,Float64),x,y)
end
# 1) convert to and from float32
t1(x::Float64) = float64(float32(x))
# 2) Dekker (1971) trick: known as Veltkamp splitting
const half64 = 1.34217729e8
t2(x::Float64) = (p = x*half64; (x-p)+p)
# 3) AND out bits
t3(x::Float64) = reinterpret(Float64,reinterpret(UInt64,x) & 0xffff_ffff_0000_0000)
# 4) AND out bits using and_float
t4(x::Float64) = and_float(x,reinterpret(Float64, 0xffff_ffff_0000_0000))
function test1(N)
s = 0.0
for i = 1:N
s += 1.323
s = t1(s)
end
s
end
function test2(N)
s = 0.0
for i = 1:N
s += 1.323
s = t2(s)
end
s
end
function test3(N)
s = 0.0
for i = 1:N
s += 1.323
s = t3(s)
end
s
end
function test4(N)
s = 0.0
for i = 1:N
s += 1.323
s = t4(s)
end
s
end
@time test1(100_000_000);
@time test2(100_000_000);
@time test3(100_000_000);
@time test4(100_000_000);
# after warmup:
# julia> @time test1(100_000_000);
# elapsed time: 0.324144731 seconds (96 bytes allocated)
# julia> @time test2(100_000_000);
# elapsed time: 0.525205335 seconds (96 bytes allocated)
# julia> @time test3(100_000_000);
# elapsed time: 0.220910499 seconds (96 bytes allocated)
# julia> @time test4(100_000_000);
# elapsed time: 0.14726621 seconds (96 bytes allocated)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment