Last active
March 4, 2025 22:18
-
-
Save ivan-pi/452111a31f056824f0af6d91f3c1bd2c to your computer and use it in GitHub Desktop.
Abstraction Penalty of procedure overloading and type composition in Fortran
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! abstraction_penalty.F90 -- | |
! A Fortran version of the Stepanov Abstraction Penalty Benchmark | |
! | |
! The aim of this benchmark is to verify how efficiently | |
! compilers can deal with Fortran array semantics and data abstraction. | |
! | |
! The structure of the benchmark is quite simple. It adds 2000 | |
! doubles in an array 25000 times. Since arrays are first-class citizens | |
! in Fortran, it is expected that a compiler can simply create the | |
! an array descriptor without the creation of temporary array copies | |
! | |
! 0 - uses a simple loop and assumed-size arrays (F77-style) | |
! 1-9 use assumed-shape arrays of doubles | |
! 10-13 use assumed-shape arrays of type(dd) - double wrapped in derived type | |
! 14-15 use assumed-shape arrays of type(ddd) - type(dd) wrapped in a derived type | |
! 16-21 use arrays of type(ddi) - a child type of type(dd) | |
! 22-23 use pointer arrays of class(dd) | |
! | |
! Some of the variants use pointer arrays, some use component selection, | |
! and some use both. Note that within extended types, the parent is also | |
! a component. | |
! | |
! By default, the overloaded sums are computing using a naive sum algorithm. | |
! This can be modified using preprocessor options: | |
! | |
! -DUSE_INTRINSIC_SUM use the built-in sum() function | |
! -DUSE_INTRINSIC_REDUCE use the builtin reduce() function | |
! -DUSE_STRUCTURE_CONSTRUCTOR use of the structure constructor | |
! for the addition result | |
! | |
! The test result is only meaningful if the compiler didn't create a | |
! contiguous array copy. To check this use the flag | |
! | |
! -DCHECK_ARRAY_TEMP check if temporary arrays are used | |
! | |
! When checking array temporaries it is recommended to cross-check with | |
! the built-in compiler options such as, | |
! | |
! * -fcheck=array-temps (gfortran) | |
! * -check arg_temp_created (ifx/ifort) | |
! | |
module dd_mod | |
use, intrinsic :: iso_c_binding, only: c_double | |
implicit none | |
integer, parameter :: dp = c_double | |
! Double wrapper | |
type :: dd | |
real(dp) :: val | |
end type | |
! Double wrapper child with TBP | |
type, extends(dd) :: ddi | |
contains | |
procedure :: get => get_ddi_val | |
end type | |
! Double wrapper wrapper | |
type :: ddd | |
type(dd) :: val | |
end type | |
! Double wrapper wrapper child with TBP | |
type, extends(ddd) :: dddi | |
contains | |
procedure :: get => get_dddi_val | |
end type | |
interface operator(+) | |
module procedure dd_add | |
module procedure ddd_add | |
end interface | |
#if defined(_OPENMP) && _OPENMP >= 201811 | |
#define USE_OMP_USER_DEFINED_REDUCTIONS 1 | |
#else | |
#define USE_OMP_USER_DEFINED_REDUCTIONS 0 | |
#endif | |
#if USE_OMP_USER_DEFINED_REDUCTION | |
!$omp declare reduction(+:dd,ddd:omp_out=omp_out+omp_in) | |
#endif | |
interface dsum | |
module procedure dp_sum | |
module procedure dd_sum | |
module procedure ddd_sum | |
end interface | |
#if defined(USE_INTRINSIC_SUM) && defined(USE_INTRINSIC_REDUCE) | |
#error "Use either sum or reduce, not both!" | |
#endif | |
interface operator(/=) | |
module procedure dd_neq | |
module procedure ddd_neq | |
end interface | |
contains | |
subroutine sum_info | |
#if USE_INTRINSIC_SUM | |
print '(A)', "[info] using intrinsic sum" | |
#elif USE_INTRINSIC_REDUCE | |
print '(A)', "[info] using intrinsic reduce" | |
#else | |
#if USE_OMP_USER_DEFINED_REDUCTIONS | |
print '(A)', "[info] using naive sum with OpenMP user-defined reduction" | |
#else | |
print '(A)', "[info] using naive sum" | |
#endif | |
#if USE_STRUCTURE_CONSTRUCTOR | |
print '(A)', "[info] using structure constructor within addition functions" | |
#endif | |
#endif | |
end subroutine | |
pure function dp_add(a,b) result(c) | |
real(dp), intent(in) :: a, b | |
real(dp) :: c | |
c = a + b | |
end function | |
pure function dp_sum(a) result(res) | |
real(dp), intent(in) :: a(:) | |
real(dp) :: res | |
#if USE_INTRINSIC_SUM | |
res = sum(a) | |
#elif USE_INTRINSIC_REDUCE | |
res = reduce(a,dp_add) | |
#else | |
integer :: i | |
res = 0.0_dp | |
!$omp simd reduction(+: res) | |
do i = 1, size(a) | |
res = res + a(i) | |
end do | |
#endif | |
end function | |
pure function dd_add(a,b) result(c) | |
type(dd), intent(in) :: a, b | |
type(dd) :: c | |
#if USE_STRUCTURE_CONSTRUCTOR | |
c = dd(a%val+b%val) | |
#else | |
c%val = a%val + b%val | |
#endif | |
end function | |
pure function dd_sum(a) result(res) | |
type(dd), intent(in) :: a(:) | |
type(dd) :: res | |
#if USE_INTRINSIC_SUM | |
#if USE_STRUCTURE_CONSTRUCTOR | |
res = dd(sum(a%val)) | |
#else | |
res%val = sum(a%val) | |
#endif | |
#elif USE_INTRINSIC_REDUCE | |
res = reduce(a,dd_add) | |
#else | |
integer :: i | |
res = dd(0.0_dp) | |
#if USE_OMP_USER_DEFINED_REDUCTIONS | |
!$omp simd reduction(+: res) | |
#endif | |
do i = 1, size(a) | |
res = res + a(i) | |
end do | |
#endif | |
end function | |
elemental function dd_neq(a,b) result(res) | |
type(dd), intent(in) :: a | |
real(dp), intent(in) :: b | |
logical :: res | |
res = a%val /= b | |
end function | |
! -------- | |
pure function ddd_add(a,b) result(c) | |
type(ddd), intent(in) :: a, b | |
type(ddd) :: c | |
#if USE_STRUCTURE_CONSTRUCTOR | |
c = ddd(dd(a%val%val + b%val%val)) | |
#else | |
c%val%val = a%val%val + b%val%val | |
#endif | |
end function | |
pure function ddd_sum(a) result(res) | |
type(ddd), intent(in) :: a(:) | |
type(ddd) :: res | |
#if USE_INTRINSIC_SUM | |
#if USE_STRUCTURE_CONSTRUCTOR | |
res = ddd(dd(sum(a%val%val))) | |
#else | |
res%val%val = sum(a%val%val) | |
#endif | |
#elif USE_INTRINSIC_REDUCE | |
res = reduce(a,ddd_add) | |
#else | |
integer :: i | |
res = ddd(dd(0.0_dp)) | |
#if USE_OMP_USER_DEFINED_REDUCTIONS | |
!$omp simd reduction(+:res) | |
#endif | |
do i = 1, size(a) | |
res = res + a(i) | |
end do | |
#endif | |
end function | |
elemental function ddd_neq(a,b) result(res) | |
type(ddd), intent(in) :: a | |
real(dp), intent(in) :: b | |
logical :: res | |
res = a%val%val /= b | |
end function | |
! We add some trivial getter procedures to | |
! the child type, just to see if it changes anything | |
pure function get_ddi_val(self) result(res) | |
class(ddi), intent(in) :: self | |
real(dp) :: res | |
res = self%val | |
end function | |
pure function get_dddi_val(self) result(res) | |
class(dddi), intent(in) :: self | |
real(dp) :: res | |
res = self%val%val | |
end function | |
end module | |
program abstraction_penalty | |
use, intrinsic :: iso_c_binding, only: c_loc, c_ptr, c_intptr_t | |
use, intrinsic :: iso_fortran_env, only: ik => int64, & | |
compiler_version, compiler_options | |
use dd_mod | |
implicit none | |
#define NITER 25000 | |
integer :: niter = NITER | |
character(32) :: str | |
integer, parameter :: sz = 2000 | |
! With 2000 elements, the size is roughly 16 KB | |
! and fits within the cache | |
real(dp), target :: data(sz) | |
type(dd), target :: dd_data(sz) | |
type(ddd), target :: ddd_data(sz) | |
type(ddi), target :: ddi_data(sz) | |
type(dddi), target :: dddi_data(sz) | |
real(dp), pointer :: ptr(:) | |
type(dd), pointer :: dd_ptr(:) | |
type(ddd), pointer :: ddd_ptr(:) | |
type(ddi), pointer :: ddi_ptr(:) | |
type(dddi), pointer :: dddi_ptr(:) | |
class(dd), pointer :: dd_poly_ptr(:) | |
class(ddd), pointer :: ddd_poly_ptr(:) | |
interface test | |
procedure :: test_dp | |
procedure :: test_dd | |
procedure :: test_ddd | |
end interface | |
integer, parameter :: TEST_SZ = 40 | |
real(dp) :: result_times(0:TEST_SZ-1) | |
logical :: array_temp(0:TEST_SZ-1) | |
integer :: k, i, itest = 0 | |
real(dp), parameter :: init_value = 3.0_dp | |
type(c_ptr) :: data_loc | |
! Assume no array temporaries are used | |
array_temp = .false. | |
if (command_argument_count() > 0) then | |
call get_command_argument(1,str) | |
read(str,*) niter | |
end if | |
print '(A)', "[info] compiler: "//compiler_version() | |
print '(A)', "[info] compiler options: "//compiler_options() | |
#if CHECK_ARRAY_TEMP | |
print '(A)', "[info] temporary array checking is ON" | |
#endif | |
! Information about the summation procedure | |
call sum_info | |
print '(A,I0)', "[info] number of iterations: ", niter | |
data = init_value | |
! Initialize via structure constructor (elemental) | |
dd_data = dd(init_value) | |
ddd_data = ddd(dd(init_value)) | |
ddi_data = ddi(init_value) | |
dddi_data = dddi(dd(init_value)) | |
! call test0(size(data),data) ! Cache warm-up | |
itest = 0 | |
call test0(size(data),data) ! #0 | |
! For temporary array checking we should | |
! pass a pointer to the first element | |
#if CHECK_ARRAY_TEMP | |
#define PTR(a) data_loc = c_loc(a) | |
#else | |
#define PTR(a) | |
#endif | |
#define PTR_AND_TEST(a1,array) \ | |
PTR(a1); \ | |
call test(array) | |
PTR_AND_TEST(data(1),data) ! #1 | |
! --- Component selection | |
PTR_AND_TEST(dd_data(1),dd_data%val) ! #2 | |
PTR_AND_TEST(ddd_data(1),ddd_data%val%val) ! #3 | |
! --- Pointers (4-6) | |
ptr => data | |
PTR_AND_TEST(ptr(1),ptr) | |
ptr => dd_data%val | |
PTR_AND_TEST(ptr(1),ptr) | |
ptr => ddd_data%val%val | |
PTR_AND_TEST(ptr(1),ptr) | |
! --- Pointers and component selection (7-9) | |
dd_ptr => dd_data | |
PTR_AND_TEST(dd_ptr(1),dd_ptr%val) | |
dd_ptr => ddd_data%val | |
PTR_AND_TEST(dd_ptr(1),dd_ptr%val) | |
ddd_ptr => ddd_data | |
PTR_AND_TEST(ddd_ptr(1),ddd_ptr%val%val) | |
! --- Single composition (10-13) | |
PTR_AND_TEST(dd_data(1),dd_data) | |
PTR_AND_TEST(ddd_data(1),ddd_data%val) | |
dd_ptr => dd_data | |
PTR_AND_TEST(dd_ptr(1),dd_ptr) | |
dd_ptr => ddd_data%val | |
PTR_AND_TEST(dd_ptr(1),dd_ptr) | |
! --- Double composition (14-15) | |
PTR_AND_TEST(ddd_data(1),ddd_data) | |
ddd_ptr => ddd_data | |
PTR_AND_TEST(ddd_data(1),ddd_ptr) | |
! --- Child type of dd (16-18,19-21) | |
PTR_AND_TEST(ddi_data(1),ddi_data%val) | |
PTR_AND_TEST(ddi_data(1),ddi_data%dd%val) | |
PTR_AND_TEST(ddi_data(1),ddi_data%dd) | |
ddi_ptr => ddi_data | |
PTR_AND_TEST(ddi_ptr(1),ddi_ptr%val) | |
PTR_AND_TEST(ddi_ptr(1),ddi_ptr%dd%val) | |
PTR_AND_TEST(ddi_ptr(1),ddi_ptr%dd) | |
! --- Polymorphic variables (22-23) | |
do k = 1, 2 | |
if (k == 1) dd_poly_ptr => dd_data | |
if (k == 2) dd_poly_ptr => ddi_data | |
select type(dd_poly_ptr) | |
type is (dd) | |
PTR_AND_TEST(dd_poly_ptr(1),dd_poly_ptr) | |
type is (ddi) | |
PTR_AND_TEST(dd_poly_ptr(1),dd_poly_ptr%dd) | |
class default | |
error stop "[error] Unexpected type." | |
end select | |
end do | |
! --- Child type of ddd (24-27) | |
PTR_AND_TEST(dddi_data(1),dddi_data%val%val) | |
PTR_AND_TEST(dddi_data(1),dddi_data%ddd%val%val) | |
dddi_ptr => dddi_data | |
PTR_AND_TEST(dddi_ptr(1),dddi_ptr%val%val) | |
PTR_AND_TEST(dddi_ptr(1),dddi_ptr%ddd%val%val) | |
! --- Polymorphic variables (28-29) | |
do k = 1, 2 | |
if (k == 1) ddd_poly_ptr => ddd_data | |
if (k == 2) ddd_poly_ptr => dddi_data | |
select type(ddd_poly_ptr) | |
type is (ddd) | |
PTR_AND_TEST(ddd_poly_ptr(1),ddd_poly_ptr) | |
type is (dddi) | |
PTR_AND_TEST(ddd_poly_ptr(1),ddd_poly_ptr%ddd) | |
class default | |
error stop "[error] Unexpected type." | |
end select | |
end do | |
! ********************* | |
#if CHECK_ARRAY_TEMP | |
if (any(array_temp)) then | |
print '(A,I0,"/",I0,A)', & | |
"[warn] potential use of temporary arrays in ", & | |
count(array_temp), itest, " tests" | |
print '(A,*(G0,:,", "))', "[warn] affected tests: ", & | |
pack([integer :: (i,i=0,itest-1)],mask=array_temp(0:itest-1)) | |
end if | |
#endif | |
call summarize | |
contains | |
!> The reference case is the plain old Fortran case | |
impure subroutine test0(n,array) | |
integer, intent(in) :: n | |
real(dp), intent(in) :: array(*) | |
integer(ik) :: tstart, tend, clock_rate | |
real(dp) :: res | |
integer :: i, j | |
call system_clock(tstart) | |
do i = 1, niter | |
#if USE_INTRINSIC_SUM | |
res = sum(array(1:n)) | |
#elif USE_INTRINSIC_REDUCE | |
res = reduce(array(1:n),dp_add) | |
#else | |
res = 0.0_dp | |
do j = 1, n | |
res = res + array(j) | |
end do | |
#endif | |
if (res /= sz*init_value) then | |
print '("test ",I0," failed")', itest | |
end if | |
end do | |
call system_clock(tend,clock_rate) | |
result_times(itest) = real(tend - tstart,dp)/real(clock_rate,dp) | |
itest = itest + 1 | |
end subroutine | |
#if CHECK_ARRAY_TEMP | |
logical function check_loc(smth) | |
type(*), intent(in), target :: smth(:) | |
check_loc = transfer(data_loc,1_c_intptr_t) /= & | |
transfer(c_loc(smth),1_c_intptr_t) | |
end function | |
#endif | |
!> Check summation as an assumed shape array argument | |
!> N.b. some compilers create temporary arrays; in that | |
!> case the test is meaningless | |
impure subroutine test_dp(array) | |
real(dp), intent(in) :: array(:) | |
integer(ik) :: tstart, tend, clock_rate | |
real(dp) :: res | |
integer :: i | |
#if CHECK_ARRAY_TEMP | |
TARGET :: array | |
array_temp(itest) = check_loc(array) | |
#endif | |
call system_clock(tstart) | |
do i = 1, niter | |
res = sum(array) | |
if (res /= sz*init_value) then | |
print '("test ",I0," failed")', itest | |
end if | |
end do | |
call system_clock(tend,clock_rate) | |
result_times(itest) = real(tend - tstart,dp)/real(clock_rate,dp) | |
itest = itest + 1 | |
end subroutine | |
impure subroutine test_dd(array) | |
type(dd), intent(in) :: array(:) | |
integer(ik) :: tstart, tend, clock_rate | |
type(dd) :: res | |
integer :: i | |
#if CHECK_ARRAY_TEMP | |
TARGET :: array | |
array_temp(itest) = check_loc(array) | |
#endif | |
call system_clock(tstart) | |
do i = 1, niter | |
res = dsum(array) | |
if (res /= sz*init_value) then | |
print '("test ",I0," failed")', itest | |
end if | |
end do | |
call system_clock(tend,clock_rate) | |
result_times(itest) = real(tend - tstart,dp)/real(clock_rate,dp) | |
itest = itest + 1 | |
end subroutine | |
impure subroutine test_ddd(array) | |
type(ddd), intent(in) :: array(:) | |
integer(ik) :: tstart, tend, clock_rate | |
type(ddd) :: res | |
integer :: i | |
#if CHECK_ARRAY_TEMP | |
TARGET :: array | |
array_temp(itest) = check_loc(array) | |
#endif | |
call system_clock(tstart) | |
do i = 1, niter | |
res = dsum(array) | |
if (res /= sz*init_value) then | |
print '("test ",I0," failed")', itest | |
end if | |
end do | |
call system_clock(tend,clock_rate) | |
result_times(itest) = real(tend - tstart,dp)/real(clock_rate,dp) | |
itest = itest + 1 | |
end subroutine | |
subroutine summarize() | |
real(dp) :: millions, total_time, gmean_times, gmean_rate, gmean_ratio | |
real(dp), allocatable :: times(:) | |
integer :: i, ntest | |
if (itest > TEST_SZ-1) error stop "[error] exceeded result table size" | |
write(*,'(/,4A12)') "test", "absolute", "additions", "ratio with" | |
write(*,'(4A12,/)') "number", "time (sec)", "per second", "test0" | |
millions = real(sz,dp) * niter / 1.0e6_dp | |
do i = 0, itest-1 | |
#if CHECK_ARRAY_TEMP | |
if (array_temp(i)) cycle | |
#endif | |
write(*,'(I12, F12.4, ES12.3, F12.3)') & | |
i, result_times(i), millions/result_times(i), & | |
result_times(i)/result_times(0) | |
end do | |
#if CHECK_ARRAY_TEMP | |
times = pack(result_times(0:itest-1), & | |
mask=.not.array_temp(0:itest-1)) | |
#else | |
times = result_times(0:itest-1) | |
#endif | |
ntest = size(times) | |
total_time = sum(times) | |
gmean_times = sum(log(times)) | |
gmean_rate = sum(log(millions/times)) | |
gmean_ratio = sum(log(times/times(1))) | |
write(*, '(A)') repeat('-',4*12) | |
write(*, '(A12, F12.4, ES12.3, F12.2)') "mean", & | |
exp(gmean_times/ntest), & | |
exp(gmean_rate/ntest), & | |
exp(gmean_ratio/ntest) | |
write(*,'(/,"Total time: ", F0.2, " sec")') total_time | |
write(*,'("Abstraction Penalty: ", F0.2, /)') exp(gmean_ratio/ntest) | |
end subroutine | |
end program |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment