ivan-pi · July 10, 2020 08:46
diff --git a/cbrt_libm_example.f90 b/cbrt_libm_example.f90
 ! @(#)s_cbrt.c 5.1 93/09/24
 !
 ! ====================================================
 ! Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 !
 ! Developed at SunPro, a Sun Microsystems, Inc. business.
 ! Permission to use, copy, modify, and distribute this
 ! software is freely granted, provided that this notice
 ! is preserved.
 ! ====================================================
 !
 ! Optimized by Bruce D. Evans.
 ! Ported to Fortran by Ivan Pribec.

 module cbrt_libm
  use iso_fortran_env, only: int32, int64, dp => real64
  implicit none
  public 

  integer(int32), parameter :: B1 = 715094163 ! B1 = (1023-1023/3-0.03306235651)*2**20
  integer(int32), parameter :: B2 = 696219795 ! B2 = (1023-1023/3-54/3-0.03306235651)*2**20

  real(dp), parameter :: P0 =  1.87595182427177009643_dp  ! 0x3ffe03e6, 0x0f61e692
  real(dp), parameter :: P1 = -1.88497979543377169875_dp  ! 0xbffe28e0, 0x92f02420
  real(dp), parameter :: P2 =  1.621429720105354466140_dp ! 0x3ff9f160, 0x4a49d6c2
  real(dp), parameter :: P3 = -0.758397934778766047437_dp ! 0xbfe844cb, 0xbee751d9
  real(dp), parameter :: P4 =  0.145996192886612446982_dp ! 0x3fc2b000, 0xd4e4edd7

 contains


  ! cbrt(x)
  ! Return cube root of x
  ! 
  elemental function cbrt(x) result(y)
    real(dp), intent(in) :: x
    real(dp) :: y

    real(dp) :: r, s, t, w
    integer(int32) :: hx, sign
    integer(int32) :: high, low
    integer(int64) :: ix
    real(dp) :: uvalue
    integer(int64) :: ubits

    sign = 0
    high = 0
    low = 0
    hx = 0
    ubits = 0
    t = 0.0_dp

    ix = transfer(x,ix)
    low = ibits(ix,0,32) ! get first word
    hx = ibits(ix,32,32) ! get second word

    ! write(*,'(B64)') x
    ! write(*,'(B32,B32)') hx, low

    sign = iand(hx,int(z'80000000',int32)) ! sign = sign(x)
    ! write(*,'(A,I0,X,B32)') "sign = ", sign, sign
    hx = ieor(hx,sign)
    if (hx >= int(z'7ff00000',int32)) then ! cbrt(NaN,INF) is itself
      y = x
      return
    end if
  !
  ! Rough cbrt to 5 bits:
  !    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
  ! where e is integral and >= 0, m is real and in [0, 1), and "/" and
  ! "%" are integer division and modulus with rounding towards minus
  ! infinity.  The RHS is always >= the LHS and has a maximum relative
  ! error of about 1 in 16.  Adding a bias of -0.03306235651 to the
  ! (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
  ! floating point representation, for finite positive normal values,
  ! ordinary integer division of the value in bits magically gives
  ! almost exactly the RHS of the above provided we first subtract the
  ! exponent bias (1023 for doubles) and later add it back.  We do the
  ! subtraction virtually to keep e >= 0 so that ordinary integer
  ! division rounds towards minus infinity; this is also efficient.
  !
    if (hx < int(z'00100000',int32)) then ! zero or subnormal?
      if (ior(hx,low) == 0) then
        y = x 
        return ! cbrt(0) is itself
      end if
      ix = z'43500000' ! set t = 2**54
      ! write(*,'(A,B64)') "ix = ", ix
      ix = ishft(ix,32)
      ! write(*,'(A,B64)') "ix = ", ix
      t = transfer(ix,t)
      t = t*x
      ix = transfer(t,ix)
      high = ibits(ix,32,32)
      ix = ior(sign,iand(high,int(z'7fffffff',int32))/3 + B2)
      ix = ishft(ix,32)
      t = transfer(ix,t)
    else
      ix = ior(sign,(hx/3+B1))
      ix = ishft(ix,32)
      t = transfer(ix,t)
    end if

  ! 
  ! New cbrt to 23 bits:
  !    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
  ! where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
  ! to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
  ! has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
  ! gives us bounds for r = t**3/x.
  ! 
  ! Try to optimize for parallel evaluation as in k_tanf.c.
  ! 
    r = (t*t)*(t/x)
    t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4))

  !
  ! Round t away from zero to 23 bits (sloppily except for ensuring that
  ! the result is larger in magnitude than cbrt(x) but not much more than
  ! 2 23-bit ulps larger).  With rounding towards zero, the error bound
  ! would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
  ! in the rounded t, the infinite-precision error in the Newton
  ! approximation barely affects third digit in the final error
  ! 0.667; the error in the rounded t can be up to about 3 23-bit ulps
  ! before the final error is larger than 0.667 ulps.
  ! 
    uvalue = t
    ubits = iand((ubits + int(z'80000000',int64)),&
      int(z'ffffffffc0000000',int64))
    t = uvalue

  ! one step Newton iteration to 53 bits with error < 0.667 ulps
    s = t*t
    r = x/s
    w = t + t
    r = (r-t)/(w + r)
    t = t + t*r

    y = t
  end function

 end module

 program main
  use cbrt_libm
  use ieee_arithmetic
  implicit none
  real(dp) :: nan, inf, x, y
  integer(int64) :: ix
  real(dp) :: a(1000000), b(1000000)

  x = -1._dp
  nan = sqrt(x)
  IF (ieee_support_inf(inf)) THEN
    inf = ieee_value(inf,  ieee_positive_inf)
  END IF

  print *, "Not a number"
  y = cbrt(nan)
  print *, "y = ", y

  print *, "Infinity"
  y = cbrt(inf)
  print *, "y = ", y
  y = cbrt(-inf)
  print *, "y = ", y

  print *, "Zero"
  y = cbrt(0.0_dp)
  print *, "y = ", y
  y = cbrt(-0.0_dp)
  print *, "y = ", y

  print *, "Denormal"
  ix = 0
  ix = ibset(ix,0)
  x = transfer(ix,x)
  print *, x
  y = cbrt(x)
  print *, "y = ", y

  print *, "Regular values"
  y = cbrt(27._dp)
  print *, "y = ", y, (27._dp)**(1._dp/3._dp)
  y = cbrt(-27._dp)
  print *, "y = ", y

  call random_number(a)
  ! a = 54*a - 27
  b = cbrt(a)
  print *, maxval(abs(b - a**(1._dp/3))), sum(abs(b - a**(1._dp/3)))
 end program
	! @(#)s_cbrt.c 5.1 93/09/24
	!
	! ====================================================
	! Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
	!
	! Developed at SunPro, a Sun Microsystems, Inc. business.
	! Permission to use, copy, modify, and distribute this
	! software is freely granted, provided that this notice
	! is preserved.
	! ====================================================
	!
	! Optimized by Bruce D. Evans.
	! Ported to Fortran by Ivan Pribec.

	module cbrt_libm
	use iso_fortran_env, only: int32, int64, dp => real64
	implicit none
	public

	integer(int32), parameter :: B1 = 715094163 ! B1 = (1023-1023/3-0.03306235651)2*20
	integer(int32), parameter :: B2 = 696219795 ! B2 = (1023-1023/3-54/3-0.03306235651)2*20

	real(dp), parameter :: P0 = 1.87595182427177009643_dp ! 0x3ffe03e6, 0x0f61e692
	real(dp), parameter :: P1 = -1.88497979543377169875_dp ! 0xbffe28e0, 0x92f02420
	real(dp), parameter :: P2 = 1.621429720105354466140_dp ! 0x3ff9f160, 0x4a49d6c2
	real(dp), parameter :: P3 = -0.758397934778766047437_dp ! 0xbfe844cb, 0xbee751d9
	real(dp), parameter :: P4 = 0.145996192886612446982_dp ! 0x3fc2b000, 0xd4e4edd7

	contains


	! cbrt(x)
	! Return cube root of x
	!
	elemental function cbrt(x) result(y)
	real(dp), intent(in) :: x
	real(dp) :: y

	real(dp) :: r, s, t, w
	integer(int32) :: hx, sign
	integer(int32) :: high, low
	integer(int64) :: ix
	real(dp) :: uvalue
	integer(int64) :: ubits

	sign = 0
	high = 0
	low = 0
	hx = 0
	ubits = 0
	t = 0.0_dp

	ix = transfer(x,ix)
	low = ibits(ix,0,32) ! get first word
	hx = ibits(ix,32,32) ! get second word

	! write(*,'(B64)') x
	! write(*,'(B32,B32)') hx, low

	sign = iand(hx,int(z'80000000',int32)) ! sign = sign(x)
	! write(*,'(A,I0,X,B32)') "sign = ", sign, sign
	hx = ieor(hx,sign)
	if (hx >= int(z'7ff00000',int32)) then ! cbrt(NaN,INF) is itself
	y = x
	return
	end if
	!
	! Rough cbrt to 5 bits:
	! cbrt(2*e(1+m) ~= 2*(e/3)(1+(e%3+m)/3)
	! where e is integral and >= 0, m is real and in [0, 1), and "/" and
	! "%" are integer division and modulus with rounding towards minus
	! infinity. The RHS is always >= the LHS and has a maximum relative
	! error of about 1 in 16. Adding a bias of -0.03306235651 to the
	! (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
	! floating point representation, for finite positive normal values,
	! ordinary integer division of the value in bits magically gives
	! almost exactly the RHS of the above provided we first subtract the
	! exponent bias (1023 for doubles) and later add it back. We do the
	! subtraction virtually to keep e >= 0 so that ordinary integer
	! division rounds towards minus infinity; this is also efficient.
	!
	if (hx < int(z'00100000',int32)) then ! zero or subnormal?
	if (ior(hx,low) == 0) then
	y = x
	return ! cbrt(0) is itself
	end if
	ix = z'43500000' ! set t = 2**54
	! write(*,'(A,B64)') "ix = ", ix
	ix = ishft(ix,32)
	! write(*,'(A,B64)') "ix = ", ix
	t = transfer(ix,t)
	t = t*x
	ix = transfer(t,ix)
	high = ibits(ix,32,32)
	ix = ior(sign,iand(high,int(z'7fffffff',int32))/3 + B2)
	ix = ishft(ix,32)
	t = transfer(ix,t)
	else
	ix = ior(sign,(hx/3+B1))
	ix = ishft(ix,32)
	t = transfer(ix,t)
	end if

	!
	! New cbrt to 23 bits:
	! cbrt(x) = tcbrt(x/t3) ~= tP(t**3/x)
	! where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
	! to within 2**-23.5 when \|r - 1\| < 1/10. The rough approximation
	! has produced t such than \|t/cbrt(x) - 1\| ~< 1/32, and cubing this
	! gives us bounds for r = t**3/x.
	!
	! Try to optimize for parallel evaluation as in k_tanf.c.
	!
	r = (tt)(t/x)
	t = t((P0+r(P1+rP2))+((rr)r)(P3+r*P4))

	!
	! Round t away from zero to 23 bits (sloppily except for ensuring that
	! the result is larger in magnitude than cbrt(x) but not much more than
	! 2 23-bit ulps larger). With rounding towards zero, the error bound
	! would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
	! in the rounded t, the infinite-precision error in the Newton
	! approximation barely affects third digit in the final error
	! 0.667; the error in the rounded t can be up to about 3 23-bit ulps
	! before the final error is larger than 0.667 ulps.
	!
	uvalue = t
	ubits = iand((ubits + int(z'80000000',int64)),&
	int(z'ffffffffc0000000',int64))
	t = uvalue

	! one step Newton iteration to 53 bits with error < 0.667 ulps
	s = t*t
	r = x/s
	w = t + t
	r = (r-t)/(w + r)
	t = t + t*r

	y = t
	end function

	end module

	program main
	use cbrt_libm
	use ieee_arithmetic
	implicit none
	real(dp) :: nan, inf, x, y
	integer(int64) :: ix
	real(dp) :: a(1000000), b(1000000)

	x = -1._dp
	nan = sqrt(x)
	IF (ieee_support_inf(inf)) THEN
	inf = ieee_value(inf, ieee_positive_inf)
	END IF

	print *, "Not a number"
	y = cbrt(nan)
	print *, "y = ", y

	print *, "Infinity"
	y = cbrt(inf)
	print *, "y = ", y
	y = cbrt(-inf)
	print *, "y = ", y

	print *, "Zero"
	y = cbrt(0.0_dp)
	print *, "y = ", y
	y = cbrt(-0.0_dp)
	print *, "y = ", y

	print *, "Denormal"
	ix = 0
	ix = ibset(ix,0)
	x = transfer(ix,x)
	print *, x
	y = cbrt(x)
	print *, "y = ", y

	print *, "Regular values"
	y = cbrt(27._dp)
	print , "y = ", y, (27._dp)*(1._dp/3._dp)
	y = cbrt(-27._dp)
	print *, "y = ", y

	call random_number(a)
	! a = 54*a - 27
	b = cbrt(a)
	print , maxval(abs(b - a(1._dp/3))), sum(abs(b - a*(1._dp/3)))
	end program