Skip to content

Instantly share code, notes, and snippets.

@ivan-pi
Last active March 4, 2025 22:18
Show Gist options
  • Save ivan-pi/452111a31f056824f0af6d91f3c1bd2c to your computer and use it in GitHub Desktop.
Save ivan-pi/452111a31f056824f0af6d91f3c1bd2c to your computer and use it in GitHub Desktop.
Abstraction Penalty of procedure overloading and type composition in Fortran
! abstraction_penalty.F90 --
! A Fortran version of the Stepanov Abstraction Penalty Benchmark
!
! The aim of this benchmark is to verify how efficiently
! compilers can deal with Fortran array semantics and data abstraction.
!
! The structure of the benchmark is quite simple. It adds 2000
! doubles in an array 25000 times. Since arrays are first-class citizens
! in Fortran, it is expected that a compiler can simply create the
! an array descriptor without the creation of temporary array copies
!
! 0 - uses a simple loop and assumed-size arrays (F77-style)
! 1-9 use assumed-shape arrays of doubles
! 10-13 use assumed-shape arrays of type(dd) - double wrapped in derived type
! 14-15 use assumed-shape arrays of type(ddd) - type(dd) wrapped in a derived type
! 16-21 use arrays of type(ddi) - a child type of type(dd)
! 22-23 use pointer arrays of class(dd)
!
! Some of the variants use pointer arrays, some use component selection,
! and some use both. Note that within extended types, the parent is also
! a component.
!
! By default, the overloaded sums are computing using a naive sum algorithm.
! This can be modified using preprocessor options:
!
! -DUSE_INTRINSIC_SUM use the built-in sum() function
! -DUSE_INTRINSIC_REDUCE use the builtin reduce() function
! -DUSE_STRUCTURE_CONSTRUCTOR use of the structure constructor
! for the addition result
!
! The test result is only meaningful if the compiler didn't create a
! contiguous array copy. To check this use the flag
!
! -DCHECK_ARRAY_TEMP check if temporary arrays are used
!
! When checking array temporaries it is recommended to cross-check with
! the built-in compiler options such as,
!
! * -fcheck=array-temps (gfortran)
! * -check arg_temp_created (ifx/ifort)
!
module dd_mod
use, intrinsic :: iso_c_binding, only: c_double
implicit none
integer, parameter :: dp = c_double
! Double wrapper
type :: dd
real(dp) :: val
end type
! Double wrapper child with TBP
type, extends(dd) :: ddi
contains
procedure :: get => get_ddi_val
end type
! Double wrapper wrapper
type :: ddd
type(dd) :: val
end type
! Double wrapper wrapper child with TBP
type, extends(ddd) :: dddi
contains
procedure :: get => get_dddi_val
end type
interface operator(+)
module procedure dd_add
module procedure ddd_add
end interface
#if defined(_OPENMP) && _OPENMP >= 201811
#define USE_OMP_USER_DEFINED_REDUCTIONS 1
#else
#define USE_OMP_USER_DEFINED_REDUCTIONS 0
#endif
#if USE_OMP_USER_DEFINED_REDUCTION
!$omp declare reduction(+:dd,ddd:omp_out=omp_out+omp_in)
#endif
interface dsum
module procedure dp_sum
module procedure dd_sum
module procedure ddd_sum
end interface
#if defined(USE_INTRINSIC_SUM) && defined(USE_INTRINSIC_REDUCE)
#error "Use either sum or reduce, not both!"
#endif
interface operator(/=)
module procedure dd_neq
module procedure ddd_neq
end interface
contains
subroutine sum_info
#if USE_INTRINSIC_SUM
print '(A)', "[info] using intrinsic sum"
#elif USE_INTRINSIC_REDUCE
print '(A)', "[info] using intrinsic reduce"
#else
#if USE_OMP_USER_DEFINED_REDUCTIONS
print '(A)', "[info] using naive sum with OpenMP user-defined reduction"
#else
print '(A)', "[info] using naive sum"
#endif
#if USE_STRUCTURE_CONSTRUCTOR
print '(A)', "[info] using structure constructor within addition functions"
#endif
#endif
end subroutine
pure function dp_add(a,b) result(c)
real(dp), intent(in) :: a, b
real(dp) :: c
c = a + b
end function
pure function dp_sum(a) result(res)
real(dp), intent(in) :: a(:)
real(dp) :: res
#if USE_INTRINSIC_SUM
res = sum(a)
#elif USE_INTRINSIC_REDUCE
res = reduce(a,dp_add)
#else
integer :: i
res = 0.0_dp
!$omp simd reduction(+: res)
do i = 1, size(a)
res = res + a(i)
end do
#endif
end function
pure function dd_add(a,b) result(c)
type(dd), intent(in) :: a, b
type(dd) :: c
#if USE_STRUCTURE_CONSTRUCTOR
c = dd(a%val+b%val)
#else
c%val = a%val + b%val
#endif
end function
pure function dd_sum(a) result(res)
type(dd), intent(in) :: a(:)
type(dd) :: res
#if USE_INTRINSIC_SUM
#if USE_STRUCTURE_CONSTRUCTOR
res = dd(sum(a%val))
#else
res%val = sum(a%val)
#endif
#elif USE_INTRINSIC_REDUCE
res = reduce(a,dd_add)
#else
integer :: i
res = dd(0.0_dp)
#if USE_OMP_USER_DEFINED_REDUCTIONS
!$omp simd reduction(+: res)
#endif
do i = 1, size(a)
res = res + a(i)
end do
#endif
end function
elemental function dd_neq(a,b) result(res)
type(dd), intent(in) :: a
real(dp), intent(in) :: b
logical :: res
res = a%val /= b
end function
! --------
pure function ddd_add(a,b) result(c)
type(ddd), intent(in) :: a, b
type(ddd) :: c
#if USE_STRUCTURE_CONSTRUCTOR
c = ddd(dd(a%val%val + b%val%val))
#else
c%val%val = a%val%val + b%val%val
#endif
end function
pure function ddd_sum(a) result(res)
type(ddd), intent(in) :: a(:)
type(ddd) :: res
#if USE_INTRINSIC_SUM
#if USE_STRUCTURE_CONSTRUCTOR
res = ddd(dd(sum(a%val%val)))
#else
res%val%val = sum(a%val%val)
#endif
#elif USE_INTRINSIC_REDUCE
res = reduce(a,ddd_add)
#else
integer :: i
res = ddd(dd(0.0_dp))
#if USE_OMP_USER_DEFINED_REDUCTIONS
!$omp simd reduction(+:res)
#endif
do i = 1, size(a)
res = res + a(i)
end do
#endif
end function
elemental function ddd_neq(a,b) result(res)
type(ddd), intent(in) :: a
real(dp), intent(in) :: b
logical :: res
res = a%val%val /= b
end function
! We add some trivial getter procedures to
! the child type, just to see if it changes anything
pure function get_ddi_val(self) result(res)
class(ddi), intent(in) :: self
real(dp) :: res
res = self%val
end function
pure function get_dddi_val(self) result(res)
class(dddi), intent(in) :: self
real(dp) :: res
res = self%val%val
end function
end module
program abstraction_penalty
use, intrinsic :: iso_c_binding, only: c_loc, c_ptr, c_intptr_t
use, intrinsic :: iso_fortran_env, only: ik => int64, &
compiler_version, compiler_options
use dd_mod
implicit none
#define NITER 25000
integer :: niter = NITER
character(32) :: str
integer, parameter :: sz = 2000
! With 2000 elements, the size is roughly 16 KB
! and fits within the cache
real(dp), target :: data(sz)
type(dd), target :: dd_data(sz)
type(ddd), target :: ddd_data(sz)
type(ddi), target :: ddi_data(sz)
type(dddi), target :: dddi_data(sz)
real(dp), pointer :: ptr(:)
type(dd), pointer :: dd_ptr(:)
type(ddd), pointer :: ddd_ptr(:)
type(ddi), pointer :: ddi_ptr(:)
type(dddi), pointer :: dddi_ptr(:)
class(dd), pointer :: dd_poly_ptr(:)
class(ddd), pointer :: ddd_poly_ptr(:)
interface test
procedure :: test_dp
procedure :: test_dd
procedure :: test_ddd
end interface
integer, parameter :: TEST_SZ = 40
real(dp) :: result_times(0:TEST_SZ-1)
logical :: array_temp(0:TEST_SZ-1)
integer :: k, i, itest = 0
real(dp), parameter :: init_value = 3.0_dp
type(c_ptr) :: data_loc
! Assume no array temporaries are used
array_temp = .false.
if (command_argument_count() > 0) then
call get_command_argument(1,str)
read(str,*) niter
end if
print '(A)', "[info] compiler: "//compiler_version()
print '(A)', "[info] compiler options: "//compiler_options()
#if CHECK_ARRAY_TEMP
print '(A)', "[info] temporary array checking is ON"
#endif
! Information about the summation procedure
call sum_info
print '(A,I0)', "[info] number of iterations: ", niter
data = init_value
! Initialize via structure constructor (elemental)
dd_data = dd(init_value)
ddd_data = ddd(dd(init_value))
ddi_data = ddi(init_value)
dddi_data = dddi(dd(init_value))
! call test0(size(data),data) ! Cache warm-up
itest = 0
call test0(size(data),data) ! #0
! For temporary array checking we should
! pass a pointer to the first element
#if CHECK_ARRAY_TEMP
#define PTR(a) data_loc = c_loc(a)
#else
#define PTR(a)
#endif
#define PTR_AND_TEST(a1,array) \
PTR(a1); \
call test(array)
PTR_AND_TEST(data(1),data) ! #1
! --- Component selection
PTR_AND_TEST(dd_data(1),dd_data%val) ! #2
PTR_AND_TEST(ddd_data(1),ddd_data%val%val) ! #3
! --- Pointers (4-6)
ptr => data
PTR_AND_TEST(ptr(1),ptr)
ptr => dd_data%val
PTR_AND_TEST(ptr(1),ptr)
ptr => ddd_data%val%val
PTR_AND_TEST(ptr(1),ptr)
! --- Pointers and component selection (7-9)
dd_ptr => dd_data
PTR_AND_TEST(dd_ptr(1),dd_ptr%val)
dd_ptr => ddd_data%val
PTR_AND_TEST(dd_ptr(1),dd_ptr%val)
ddd_ptr => ddd_data
PTR_AND_TEST(ddd_ptr(1),ddd_ptr%val%val)
! --- Single composition (10-13)
PTR_AND_TEST(dd_data(1),dd_data)
PTR_AND_TEST(ddd_data(1),ddd_data%val)
dd_ptr => dd_data
PTR_AND_TEST(dd_ptr(1),dd_ptr)
dd_ptr => ddd_data%val
PTR_AND_TEST(dd_ptr(1),dd_ptr)
! --- Double composition (14-15)
PTR_AND_TEST(ddd_data(1),ddd_data)
ddd_ptr => ddd_data
PTR_AND_TEST(ddd_data(1),ddd_ptr)
! --- Child type of dd (16-18,19-21)
PTR_AND_TEST(ddi_data(1),ddi_data%val)
PTR_AND_TEST(ddi_data(1),ddi_data%dd%val)
PTR_AND_TEST(ddi_data(1),ddi_data%dd)
ddi_ptr => ddi_data
PTR_AND_TEST(ddi_ptr(1),ddi_ptr%val)
PTR_AND_TEST(ddi_ptr(1),ddi_ptr%dd%val)
PTR_AND_TEST(ddi_ptr(1),ddi_ptr%dd)
! --- Polymorphic variables (22-23)
do k = 1, 2
if (k == 1) dd_poly_ptr => dd_data
if (k == 2) dd_poly_ptr => ddi_data
select type(dd_poly_ptr)
type is (dd)
PTR_AND_TEST(dd_poly_ptr(1),dd_poly_ptr)
type is (ddi)
PTR_AND_TEST(dd_poly_ptr(1),dd_poly_ptr%dd)
class default
error stop "[error] Unexpected type."
end select
end do
! --- Child type of ddd (24-27)
PTR_AND_TEST(dddi_data(1),dddi_data%val%val)
PTR_AND_TEST(dddi_data(1),dddi_data%ddd%val%val)
dddi_ptr => dddi_data
PTR_AND_TEST(dddi_ptr(1),dddi_ptr%val%val)
PTR_AND_TEST(dddi_ptr(1),dddi_ptr%ddd%val%val)
! --- Polymorphic variables (28-29)
do k = 1, 2
if (k == 1) ddd_poly_ptr => ddd_data
if (k == 2) ddd_poly_ptr => dddi_data
select type(ddd_poly_ptr)
type is (ddd)
PTR_AND_TEST(ddd_poly_ptr(1),ddd_poly_ptr)
type is (dddi)
PTR_AND_TEST(ddd_poly_ptr(1),ddd_poly_ptr%ddd)
class default
error stop "[error] Unexpected type."
end select
end do
! *********************
#if CHECK_ARRAY_TEMP
if (any(array_temp)) then
print '(A,I0,"/",I0,A)', &
"[warn] potential use of temporary arrays in ", &
count(array_temp), itest, " tests"
print '(A,*(G0,:,", "))', "[warn] affected tests: ", &
pack([integer :: (i,i=0,itest-1)],mask=array_temp(0:itest-1))
end if
#endif
call summarize
contains
!> The reference case is the plain old Fortran case
impure subroutine test0(n,array)
integer, intent(in) :: n
real(dp), intent(in) :: array(*)
integer(ik) :: tstart, tend, clock_rate
real(dp) :: res
integer :: i, j
call system_clock(tstart)
do i = 1, niter
#if USE_INTRINSIC_SUM
res = sum(array(1:n))
#elif USE_INTRINSIC_REDUCE
res = reduce(array(1:n),dp_add)
#else
res = 0.0_dp
do j = 1, n
res = res + array(j)
end do
#endif
if (res /= sz*init_value) then
print '("test ",I0," failed")', itest
end if
end do
call system_clock(tend,clock_rate)
result_times(itest) = real(tend - tstart,dp)/real(clock_rate,dp)
itest = itest + 1
end subroutine
#if CHECK_ARRAY_TEMP
logical function check_loc(smth)
type(*), intent(in), target :: smth(:)
check_loc = transfer(data_loc,1_c_intptr_t) /= &
transfer(c_loc(smth),1_c_intptr_t)
end function
#endif
!> Check summation as an assumed shape array argument
!> N.b. some compilers create temporary arrays; in that
!> case the test is meaningless
impure subroutine test_dp(array)
real(dp), intent(in) :: array(:)
integer(ik) :: tstart, tend, clock_rate
real(dp) :: res
integer :: i
#if CHECK_ARRAY_TEMP
TARGET :: array
array_temp(itest) = check_loc(array)
#endif
call system_clock(tstart)
do i = 1, niter
res = sum(array)
if (res /= sz*init_value) then
print '("test ",I0," failed")', itest
end if
end do
call system_clock(tend,clock_rate)
result_times(itest) = real(tend - tstart,dp)/real(clock_rate,dp)
itest = itest + 1
end subroutine
impure subroutine test_dd(array)
type(dd), intent(in) :: array(:)
integer(ik) :: tstart, tend, clock_rate
type(dd) :: res
integer :: i
#if CHECK_ARRAY_TEMP
TARGET :: array
array_temp(itest) = check_loc(array)
#endif
call system_clock(tstart)
do i = 1, niter
res = dsum(array)
if (res /= sz*init_value) then
print '("test ",I0," failed")', itest
end if
end do
call system_clock(tend,clock_rate)
result_times(itest) = real(tend - tstart,dp)/real(clock_rate,dp)
itest = itest + 1
end subroutine
impure subroutine test_ddd(array)
type(ddd), intent(in) :: array(:)
integer(ik) :: tstart, tend, clock_rate
type(ddd) :: res
integer :: i
#if CHECK_ARRAY_TEMP
TARGET :: array
array_temp(itest) = check_loc(array)
#endif
call system_clock(tstart)
do i = 1, niter
res = dsum(array)
if (res /= sz*init_value) then
print '("test ",I0," failed")', itest
end if
end do
call system_clock(tend,clock_rate)
result_times(itest) = real(tend - tstart,dp)/real(clock_rate,dp)
itest = itest + 1
end subroutine
subroutine summarize()
real(dp) :: millions, total_time, gmean_times, gmean_rate, gmean_ratio
real(dp), allocatable :: times(:)
integer :: i, ntest
if (itest > TEST_SZ-1) error stop "[error] exceeded result table size"
write(*,'(/,4A12)') "test", "absolute", "additions", "ratio with"
write(*,'(4A12,/)') "number", "time (sec)", "per second", "test0"
millions = real(sz,dp) * niter / 1.0e6_dp
do i = 0, itest-1
#if CHECK_ARRAY_TEMP
if (array_temp(i)) cycle
#endif
write(*,'(I12, F12.4, ES12.3, F12.3)') &
i, result_times(i), millions/result_times(i), &
result_times(i)/result_times(0)
end do
#if CHECK_ARRAY_TEMP
times = pack(result_times(0:itest-1), &
mask=.not.array_temp(0:itest-1))
#else
times = result_times(0:itest-1)
#endif
ntest = size(times)
total_time = sum(times)
gmean_times = sum(log(times))
gmean_rate = sum(log(millions/times))
gmean_ratio = sum(log(times/times(1)))
write(*, '(A)') repeat('-',4*12)
write(*, '(A12, F12.4, ES12.3, F12.2)') "mean", &
exp(gmean_times/ntest), &
exp(gmean_rate/ntest), &
exp(gmean_ratio/ntest)
write(*,'(/,"Total time: ", F0.2, " sec")') total_time
write(*,'("Abstraction Penalty: ", F0.2, /)') exp(gmean_ratio/ntest)
end subroutine
end program
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment