Created
July 10, 2020 08:46
-
-
Save ivan-pi/5cf86ba198bc497331fba3d3a1a07c59 to your computer and use it in GitHub Desktop.
Port of the libm cbrt function (https://svnweb.freebsd.org/base/head/lib/msun/src/s_cbrt.c?revision=342563&view=markup) from C to Fortran. This code has not been verified. Use at your own risk!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! @(#)s_cbrt.c 5.1 93/09/24 | |
! | |
! ==================================================== | |
! Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | |
! | |
! Developed at SunPro, a Sun Microsystems, Inc. business. | |
! Permission to use, copy, modify, and distribute this | |
! software is freely granted, provided that this notice | |
! is preserved. | |
! ==================================================== | |
! | |
! Optimized by Bruce D. Evans. | |
! Ported to Fortran by Ivan Pribec. | |
module cbrt_libm | |
use iso_fortran_env, only: int32, int64, dp => real64 | |
implicit none | |
public | |
integer(int32), parameter :: B1 = 715094163 ! B1 = (1023-1023/3-0.03306235651)*2**20 | |
integer(int32), parameter :: B2 = 696219795 ! B2 = (1023-1023/3-54/3-0.03306235651)*2**20 | |
real(dp), parameter :: P0 = 1.87595182427177009643_dp ! 0x3ffe03e6, 0x0f61e692 | |
real(dp), parameter :: P1 = -1.88497979543377169875_dp ! 0xbffe28e0, 0x92f02420 | |
real(dp), parameter :: P2 = 1.621429720105354466140_dp ! 0x3ff9f160, 0x4a49d6c2 | |
real(dp), parameter :: P3 = -0.758397934778766047437_dp ! 0xbfe844cb, 0xbee751d9 | |
real(dp), parameter :: P4 = 0.145996192886612446982_dp ! 0x3fc2b000, 0xd4e4edd7 | |
contains | |
! cbrt(x) | |
! Return cube root of x | |
! | |
elemental function cbrt(x) result(y) | |
real(dp), intent(in) :: x | |
real(dp) :: y | |
real(dp) :: r, s, t, w | |
integer(int32) :: hx, sign | |
integer(int32) :: high, low | |
integer(int64) :: ix | |
real(dp) :: uvalue | |
integer(int64) :: ubits | |
sign = 0 | |
high = 0 | |
low = 0 | |
hx = 0 | |
ubits = 0 | |
t = 0.0_dp | |
ix = transfer(x,ix) | |
low = ibits(ix,0,32) ! get first word | |
hx = ibits(ix,32,32) ! get second word | |
! write(*,'(B64)') x | |
! write(*,'(B32,B32)') hx, low | |
sign = iand(hx,int(z'80000000',int32)) ! sign = sign(x) | |
! write(*,'(A,I0,X,B32)') "sign = ", sign, sign | |
hx = ieor(hx,sign) | |
if (hx >= int(z'7ff00000',int32)) then ! cbrt(NaN,INF) is itself | |
y = x | |
return | |
end if | |
! | |
! Rough cbrt to 5 bits: | |
! cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) | |
! where e is integral and >= 0, m is real and in [0, 1), and "/" and | |
! "%" are integer division and modulus with rounding towards minus | |
! infinity. The RHS is always >= the LHS and has a maximum relative | |
! error of about 1 in 16. Adding a bias of -0.03306235651 to the | |
! (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE | |
! floating point representation, for finite positive normal values, | |
! ordinary integer division of the value in bits magically gives | |
! almost exactly the RHS of the above provided we first subtract the | |
! exponent bias (1023 for doubles) and later add it back. We do the | |
! subtraction virtually to keep e >= 0 so that ordinary integer | |
! division rounds towards minus infinity; this is also efficient. | |
! | |
if (hx < int(z'00100000',int32)) then ! zero or subnormal? | |
if (ior(hx,low) == 0) then | |
y = x | |
return ! cbrt(0) is itself | |
end if | |
ix = z'43500000' ! set t = 2**54 | |
! write(*,'(A,B64)') "ix = ", ix | |
ix = ishft(ix,32) | |
! write(*,'(A,B64)') "ix = ", ix | |
t = transfer(ix,t) | |
t = t*x | |
ix = transfer(t,ix) | |
high = ibits(ix,32,32) | |
ix = ior(sign,iand(high,int(z'7fffffff',int32))/3 + B2) | |
ix = ishft(ix,32) | |
t = transfer(ix,t) | |
else | |
ix = ior(sign,(hx/3+B1)) | |
ix = ishft(ix,32) | |
t = transfer(ix,t) | |
end if | |
! | |
! New cbrt to 23 bits: | |
! cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) | |
! where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) | |
! to within 2**-23.5 when |r - 1| < 1/10. The rough approximation | |
! has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this | |
! gives us bounds for r = t**3/x. | |
! | |
! Try to optimize for parallel evaluation as in k_tanf.c. | |
! | |
r = (t*t)*(t/x) | |
t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4)) | |
! | |
! Round t away from zero to 23 bits (sloppily except for ensuring that | |
! the result is larger in magnitude than cbrt(x) but not much more than | |
! 2 23-bit ulps larger). With rounding towards zero, the error bound | |
! would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps | |
! in the rounded t, the infinite-precision error in the Newton | |
! approximation barely affects third digit in the final error | |
! 0.667; the error in the rounded t can be up to about 3 23-bit ulps | |
! before the final error is larger than 0.667 ulps. | |
! | |
uvalue = t | |
ubits = iand((ubits + int(z'80000000',int64)),& | |
int(z'ffffffffc0000000',int64)) | |
t = uvalue | |
! one step Newton iteration to 53 bits with error < 0.667 ulps | |
s = t*t | |
r = x/s | |
w = t + t | |
r = (r-t)/(w + r) | |
t = t + t*r | |
y = t | |
end function | |
end module | |
program main | |
use cbrt_libm | |
use ieee_arithmetic | |
implicit none | |
real(dp) :: nan, inf, x, y | |
integer(int64) :: ix | |
real(dp) :: a(1000000), b(1000000) | |
x = -1._dp | |
nan = sqrt(x) | |
IF (ieee_support_inf(inf)) THEN | |
inf = ieee_value(inf, ieee_positive_inf) | |
END IF | |
print *, "Not a number" | |
y = cbrt(nan) | |
print *, "y = ", y | |
print *, "Infinity" | |
y = cbrt(inf) | |
print *, "y = ", y | |
y = cbrt(-inf) | |
print *, "y = ", y | |
print *, "Zero" | |
y = cbrt(0.0_dp) | |
print *, "y = ", y | |
y = cbrt(-0.0_dp) | |
print *, "y = ", y | |
print *, "Denormal" | |
ix = 0 | |
ix = ibset(ix,0) | |
x = transfer(ix,x) | |
print *, x | |
y = cbrt(x) | |
print *, "y = ", y | |
print *, "Regular values" | |
y = cbrt(27._dp) | |
print *, "y = ", y, (27._dp)**(1._dp/3._dp) | |
y = cbrt(-27._dp) | |
print *, "y = ", y | |
call random_number(a) | |
! a = 54*a - 27 | |
b = cbrt(a) | |
print *, maxval(abs(b - a**(1._dp/3))), sum(abs(b - a**(1._dp/3))) | |
end program |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment