Created
December 29, 2024 19:30
-
-
Save certik/924cf554b0144f00d15937970310fab7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! Code from: | |
! https://fortran-lang.discourse.group/t/nvfortran-comparison-of-do-concurrent-vs-openmp-code/8552/18 | |
! | |
! Adapted version with OpenMP and DO CONCURRENT in the same program file | |
! and removed disk write in the end. | |
! | |
MODULE all_data | |
USE iso_fortran_env, ONLY: int64, i4 => int32, r8 => real64 | |
IMPLICIT NONE | |
!============= | |
INTEGER(i4), PARAMETER :: nx = 2000 | |
INTEGER(i4), PARAMETER :: ny = 2000 | |
REAL(r8), PARAMETER :: dx = 0.03_r8 | |
REAL(r8), PARAMETER :: dy = 0.03_r8 | |
REAL(r8), PARAMETER :: one = 1.0_r8 | |
REAL(r8), PARAMETER :: two = 2.0_r8 | |
REAL(r8), PARAMETER :: four = 4.0_r8 | |
REAL(r8), PARAMETER :: half = 0.5_r8 | |
!============= | |
INTEGER(i4), PARAMETER :: nsteps = 2000 | |
INTEGER(i4), PARAMETER :: nprint = 100 | |
REAL(r8), PARAMETER :: dtime = 0.0001_r8 | |
!=============== | |
REAL(r8), PARAMETER :: tau = 0.0003_r8 | |
REAL(r8), PARAMETER :: epsilonb = 0.01_r8 | |
REAL(r8), PARAMETER :: kappa = 1.8_r8 | |
REAL(r8), PARAMETER :: delta = 0.02_r8 | |
REAL(r8), PARAMETER :: aniso = 6.0_r8 | |
REAL(r8), PARAMETER :: alpha = 0.9_r8 | |
REAL(r8), PARAMETER :: gama = 10.0_r8 | |
REAL(r8), PARAMETER :: teq = 1.0_r8 | |
REAL(r8), PARAMETER :: theta0 = 0.2_r8 | |
REAL(r8), PARAMETER :: seed = 5.0_r8 | |
REAL(r8), PARAMETER :: pix = four*ATAN(one) | |
!=============== | |
REAL(r8), DIMENSION(nx, ny) :: phi, tempr | |
REAL(r8), DIMENSION(nx, ny) :: lap_phi, lap_tempr | |
REAL(r8), DIMENSION(nx, ny) :: phidx, phidy | |
REAL(r8), DIMENSION(nx, ny) :: epsil, epsilon_deriv | |
CONTAINS | |
END MODULE all_data | |
PROGRAM openmp_test | |
USE iso_fortran_env, ONLY: int64, i4 => int32, r8 => real64 | |
USE all_data | |
#ifdef _OPENMP | |
USE omp_lib | |
#endif | |
IMPLICIT NONE | |
INTEGER(i4) :: i, j, ip, im, jp, jm | |
INTEGER(i4) :: tsteps | |
REAL(r8) :: secs, seca, secb | |
REAL(r8) :: phi_old, term1, term2 | |
REAL(r8) :: theta, m | |
WRITE(*, *) 'start of program', i4, r8 | |
WRITE(*, *) 'Vern : ', compiler_version() | |
#ifdef _OPENMP | |
WRITE(*, *) 'Number of OMP threads = ', OMP_GET_MAX_THREADS() | |
#endif | |
CALL timer('start') | |
phi = 0.0 | |
tempr = 0.0 | |
secs = 0 | |
seca = 0 | |
secb = 0 | |
DO j = 1, ny | |
DO i = 1, nx | |
IF ((i - nx/two)*(i - nx/two) + (j - ny/two)*(j - ny/two) < seed) THEN | |
phi(i, j) = one | |
END IF | |
END DO | |
END DO | |
CALL timer('initialise phi') | |
time_loop: DO tsteps = 1, nsteps | |
secs = secs + delta_sec() | |
#ifdef _USE_DO_CONCURRENT_ | |
DO CONCURRENT (j=1:ny, i=1:nx) local(ip, im, jp, jm, theta) & | |
shared(lap_phi, lap_tempr, phi, tempr, phidx, phidy, & | |
epsil, epsilon_deriv) | |
#else | |
!$omp parallel do collapse(2) private(i, j, ip, im, jp, jm, theta) | |
DO j = 1, ny | |
DO i = 1, nx | |
#endif | |
jp = j + 1 | |
jm = j - 1 | |
ip = i + 1 | |
im = i - 1 | |
IF (im == 0) im = nx | |
IF (ip == (nx + 1)) ip = 1 | |
IF (jm == 0) jm = ny | |
IF (jp == (ny + 1)) jp = 1 | |
lap_phi(i, j) = (phi(ip, j) + phi(im, j) + phi(i, jm) & | |
+ phi(i, jp) - four*phi(i, j)) / (dx*dy) | |
lap_tempr(i, j) = (tempr(ip, j) + tempr(im, j) & | |
+ tempr(i, jm) + tempr(i, jp) - four*tempr(i, j)) / (dx*dy) | |
phidx(i, j) = (phi(ip, j) - phi(im, j)) / dx | |
phidy(i, j) = (phi(i, jp) - phi(i, jm)) / dy | |
theta = zatan2 (phidy(i, j), phidx(i, j)) | |
epsil(i, j) = epsilonb*(one + delta*COS(aniso*(theta - theta0))) | |
epsilon_deriv(i, j) = & | |
-epsilonb*aniso*delta*SIN(aniso*(theta - theta0)) | |
#ifdef _USE_DO_CONCURRENT_ | |
END DO | |
#else | |
END DO | |
END DO | |
!$omp end parallel do | |
#endif | |
seca = seca + delta_sec() | |
#ifdef _USE_DO_CONCURRENT_ | |
DO CONCURRENT (j=1:ny, i=1:nx) local(ip, im, jp, jm, phi_old, term1, term2, m) & | |
shared(phi, tempr, epsil, epsilon_deriv, phidx, phidy, lap_phi, lap_tempr) | |
#else | |
!$omp parallel do collapse(2) private(i, j, ip, im, jp, jm, phi_old, term1, term2, m) | |
DO j = 1, ny | |
DO i = 1, nx | |
#endif | |
jp = j + 1 | |
jm = j - 1 | |
ip = i + 1 | |
im = i - 1 | |
IF (im == 0) im = nx | |
IF (ip == (nx + 1)) ip = 1 | |
IF (jm == 0) jm = ny | |
IF (jp == (ny + 1)) jp = 1 | |
phi_old = phi(i, j) | |
term1 = (epsil(i, jp)*epsilon_deriv(i, jp)*phidx(i, jp) & | |
- epsil(i, jm)*epsilon_deriv(i, jm)*phidx(i, jm)) / dy | |
term2 = -(epsil(ip, j)*epsilon_deriv(ip, j)*phidy(ip, j) & | |
- epsil(im, j)*epsilon_deriv(im, j)*phidy(im, j)) / dx | |
m = alpha/pix * ATAN(gama*(teq - tempr(i, j))) | |
phi(i, j) = phi(i, j) + (dtime/tau) * (term1 + term2 & | |
+ epsil(i, j)**2 * lap_phi(i, j)) & | |
+ phi_old * (one - phi_old)*(phi_old - half + m) | |
tempr(i, j) = tempr(i, j) + dtime*lap_tempr(i, j) & | |
+ kappa * (phi(i, j) - phi_old) | |
#ifdef _USE_DO_CONCURRENT_ | |
END DO | |
#else | |
END DO | |
END DO | |
!$omp end parallel do | |
#endif | |
secb = secb + delta_sec() | |
IF (MOD(tsteps, nprint) == 0) WRITE(*, *) 'Done steps = ', tsteps | |
END DO time_loop | |
CALL timer('time_loop') | |
WRITE(*, *) "nx=", nx , "ny=", ny | |
WRITE(*, '(A, 2X, F10.1, 2X, F10.1, 2X, F10.1)') 'stages:', secs, seca, secb | |
CONTAINS | |
PURE REAL(r8) FUNCTION zatan2(y, x) | |
! Function arguments | |
REAL(r8), INTENT(in) :: y, x | |
! Local variables | |
REAL(r8) :: res | |
IF (ABS(x) > 0.0 .OR. ABS (y) > 0.0) THEN | |
res = ATAN2(y, x) | |
ELSE | |
res = 0 | |
END IF | |
zatan2 = res | |
END FUNCTION zatan2 | |
SUBROUTINE timer(desc) | |
! Subroutine arguments | |
CHARACTER(len=*) desc | |
! Local variables | |
INTEGER(int64) :: clock, rate | |
INTEGER(int64), SAVE :: last_clock = -1 | |
REAL(r8) :: sec | |
IF (last_clock < 0) THEN | |
CALL system_clock(last_clock, rate) | |
sec = 0 | |
WRITE(*, '(F10.3, 2X, A)') sec, desc | |
ELSE | |
CALL system_clock(clock, rate) | |
sec = REAL(clock - last_clock, r8)/REAL(rate, r8) | |
WRITE(*, '(F10.1, 2X, A)') sec, desc | |
last_clock = clock | |
END IF | |
END SUBROUTINE timer | |
REAL(r8) FUNCTION delta_sec() | |
INTEGER(int64) :: clock, rate | |
INTEGER(int64), SAVE :: last_clock = -1 | |
REAL(r8) :: sec | |
IF (last_clock < 0) THEN | |
CALL system_clock(last_clock, rate) | |
sec = 0 | |
ELSE | |
CALL system_clock(clock, rate) | |
sec = REAL(clock - last_clock, r8)/REAL(rate, r8) | |
last_clock = clock | |
END IF | |
delta_sec = sec | |
END FUNCTION delta_sec | |
END PROGRAM openmp_test |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment