Last active
August 16, 2017 02:22
-
-
Save musm/7b51c83e56bfc1a4de58df23be3db3f0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
julia> versioninfo() | |
Julia Version 0.7.0-DEV.1208 | |
Commit 952dc93489* (2017-08-02 23:54 UTC) | |
Platform Info: | |
OS: Linux (x86_64-linux-gnu) | |
CPU: Intel(R) Core(TM) i7-4510U CPU @ 2.00GHz | |
WORD_SIZE: 64 | |
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell) | |
LAPACK: libopenblas64_ | |
LIBM: libopenlibm | |
LLVM: libLLVM-3.9.1 (ORCJIT, haswell) | |
Environment: | |
using Base.Math.@horner, Base.sqrt_llvm | |
# asin methods | |
ASIN_X_MIN_THRESHOLD(::Type{Float32}) = 2.0f0^-12 | |
ASIN_X_MIN_THRESHOLD(::Type{Float64}) = sqrt(eps(Float64)) | |
asin_p(t::Float64) = | |
t*@horner(t, | |
1.66666666666666657415e-01, | |
-3.25565818622400915405e-01, | |
2.01212532134862925881e-01, | |
-4.00555345006794114027e-02, | |
7.91534994289814532176e-04, | |
3.47933107596021167570e-05) | |
asin_q(t::Float64) = | |
@horner(t, | |
1.0, | |
-2.40339491173441421878e+00, | |
2.02094576023350569471e+00, | |
-6.88283971605453293030e-01, | |
7.70381505559019352791e-02) | |
asin_p(t::Float32) = | |
t*@horner(t, | |
1.6666586697f-01, | |
-4.2743422091f-02, | |
-8.6563630030f-03) | |
asin_q(t::Float32) = @horner(t, 1.0f0, -7.0662963390f-01) | |
@inline function asin_kernel(t::Float64, x::Float64) | |
pio2_lo = 6.12323399573676603587e-17 | |
s = sqrt_llvm(t) | |
p = asin_p(t) # numerator polynomial | |
q = asin_q(t) # denominator polynomial | |
if abs(x) >= 0.975 # |x| > 0.975 | |
Rx = p/q | |
return flipsign(pi/2 - (2.0*(s + s*Rx) - pio2_lo), x) | |
else | |
s0 = reinterpret(Float64, (reinterpret(UInt64, s) >> 32) << 32) | |
c = (t - s0*s0)/(s + s0) | |
Rx = p/q | |
p = 2.0*s*Rx - (pio2_lo - 2.0*c) | |
q = pi/4 - 2.0*s0 | |
return flipsign(pi/4 - (p-q), x) | |
end | |
end | |
@inline function asin_kernel(t::Float32, x::Float32) | |
s = sqrt_llvm(Float64(t)) | |
p = asin_p(t) # numerator polynomial | |
q = asin_q(t) # denominator polynomial | |
Rx = p/q # rational approximation | |
flipsign(Float32(pi/2 - 2*(s + s*Rx)), x) | |
end | |
@noinline asin_domain_error(x) = throw(DomainError(x, "asin(x) is not defined for |x|>1.")) | |
function asin1(x::T) where T<:Union{Float32, Float64} | |
absx = abs(x) | |
if absx >= T(1.0) # |x|>= 1 | |
if absx == T(1.0) | |
return flipsign(T(pi)/2, x) | |
end | |
asin_domain_error(x) | |
elseif absx < T(1.0)/2 | |
# if |x| sufficiently small, |x| is a good approximation | |
if absx < ASIN_X_MIN_THRESHOLD(T) | |
return x | |
end | |
# else if |x|<0.5 we use a rational approximation R(x)=p(x)/q(x) such that | |
# tan(x) ≈ x+x*R(x) | |
x² = x*x | |
Rx = asin_p(x²)/asin_q(x²) # rational approximation | |
return muladd(x, Rx, x) | |
end | |
# else 1/2 <= |x| < 1 | |
t = (T(1.0) - absx)/2 | |
return asin_kernel(t, x) | |
end | |
function asin2(x::T) where T<:Union{Float32, Float64} | |
absx = abs(x) | |
if absx >= T(1.0) # |x|>= 1 | |
if absx == T(1.0) | |
return flipsign(T(pi)/2, x) | |
end | |
throw(DomainError("SDF")) | |
elseif absx < T(1.0)/2 | |
# if |x| sufficiently small, |x| is a good approximation | |
if absx < ASIN_X_MIN_THRESHOLD(T) | |
return x | |
end | |
# else if |x|<0.5 we use a rational approximation R(x)=p(x)/q(x) such that | |
# tan(x) ≈ x+x*R(x) | |
x² = x*x | |
Rx = asin_p(x²)/asin_q(x²) # rational approximation | |
return muladd(x, Rx, x) | |
end | |
# else 1/2 <= |x| < 1 | |
t = (T(1.0) - absx)/2 | |
return asin_kernel(t, x) | |
end | |
julia> @benchmark asin2(.532) | |
BenchmarkTools.Trial: | |
memory estimate: 0 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 14.000 ns (0.00% GC) | |
median time: 15.000 ns (0.00% GC) | |
mean time: 16.310 ns (0.00% GC) | |
maximum time: 67.000 ns (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1000 | |
julia> @benchmark asin1(.532) | |
BenchmarkTools.Trial: | |
memory estimate: 0 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 13.000 ns (0.00% GC) | |
median time: 15.000 ns (0.00% GC) | |
mean time: 15.408 ns (0.00% GC) | |
maximum time: 112.000 ns (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1000 | |
julia> @benchmark asin2(.532f0) | |
BenchmarkTools.Trial: | |
memory estimate: 0 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 9.000 ns (0.00% GC) | |
median time: 10.000 ns (0.00% GC) | |
mean time: 10.001 ns (0.00% GC) | |
maximum time: 98.000 ns (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1000 | |
julia> @benchmark asin1(.532f0) | |
BenchmarkTools.Trial: | |
memory estimate: 0 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 7.000 ns (0.00% GC) | |
median time: 8.000 ns (0.00% GC) | |
mean time: 8.361 ns (0.00% GC) | |
maximum time: 200.000 ns (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1000 | |
julia> @benchmark asin1(.9f0) | |
BenchmarkTools.Trial: | |
memory estimate: 0 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 7.000 ns (0.00% GC) | |
median time: 8.000 ns (0.00% GC) | |
mean time: 8.397 ns (0.00% GC) | |
maximum time: 47.000 ns (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1000 | |
julia> @benchmark asin2(.9f0) | |
BenchmarkTools.Trial: | |
memory estimate: 0 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 9.000 ns (0.00% GC) | |
median time: 10.000 ns (0.00% GC) | |
mean time: 10.607 ns (0.00% GC) | |
maximum time: 83.000 ns (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1000 | |
julia> @benchmark asin2(.9) | |
BenchmarkTools.Trial: | |
memory estimate: 0 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 15.000 ns (0.00% GC) | |
median time: 15.000 ns (0.00% GC) | |
mean time: 15.975 ns (0.00% GC) | |
maximum time: 57.000 ns (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1000 | |
julia> @benchmark asin1(.9) | |
BenchmarkTools.Trial: | |
memory estimate: 0 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 13.000 ns (0.00% GC) | |
median time: 14.000 ns (0.00% GC) | |
mean time: 15.044 ns (0.00% GC) | |
maximum time: 69.000 ns (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1000 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Different Machine