Skip to content

Instantly share code, notes, and snippets.

@certik
Created December 29, 2024 19:30
Show Gist options
  • Save certik/924cf554b0144f00d15937970310fab7 to your computer and use it in GitHub Desktop.
Save certik/924cf554b0144f00d15937970310fab7 to your computer and use it in GitHub Desktop.
! Code from:
! https://fortran-lang.discourse.group/t/nvfortran-comparison-of-do-concurrent-vs-openmp-code/8552/18
!
! Adapted version with OpenMP and DO CONCURRENT in the same program file
! and removed disk write in the end.
!
MODULE all_data
USE iso_fortran_env, ONLY: int64, i4 => int32, r8 => real64
IMPLICIT NONE
!=============
INTEGER(i4), PARAMETER :: nx = 2000
INTEGER(i4), PARAMETER :: ny = 2000
REAL(r8), PARAMETER :: dx = 0.03_r8
REAL(r8), PARAMETER :: dy = 0.03_r8
REAL(r8), PARAMETER :: one = 1.0_r8
REAL(r8), PARAMETER :: two = 2.0_r8
REAL(r8), PARAMETER :: four = 4.0_r8
REAL(r8), PARAMETER :: half = 0.5_r8
!=============
INTEGER(i4), PARAMETER :: nsteps = 2000
INTEGER(i4), PARAMETER :: nprint = 100
REAL(r8), PARAMETER :: dtime = 0.0001_r8
!===============
REAL(r8), PARAMETER :: tau = 0.0003_r8
REAL(r8), PARAMETER :: epsilonb = 0.01_r8
REAL(r8), PARAMETER :: kappa = 1.8_r8
REAL(r8), PARAMETER :: delta = 0.02_r8
REAL(r8), PARAMETER :: aniso = 6.0_r8
REAL(r8), PARAMETER :: alpha = 0.9_r8
REAL(r8), PARAMETER :: gama = 10.0_r8
REAL(r8), PARAMETER :: teq = 1.0_r8
REAL(r8), PARAMETER :: theta0 = 0.2_r8
REAL(r8), PARAMETER :: seed = 5.0_r8
REAL(r8), PARAMETER :: pix = four*ATAN(one)
!===============
REAL(r8), DIMENSION(nx, ny) :: phi, tempr
REAL(r8), DIMENSION(nx, ny) :: lap_phi, lap_tempr
REAL(r8), DIMENSION(nx, ny) :: phidx, phidy
REAL(r8), DIMENSION(nx, ny) :: epsil, epsilon_deriv
CONTAINS
END MODULE all_data
PROGRAM openmp_test
USE iso_fortran_env, ONLY: int64, i4 => int32, r8 => real64
USE all_data
#ifdef _OPENMP
USE omp_lib
#endif
IMPLICIT NONE
INTEGER(i4) :: i, j, ip, im, jp, jm
INTEGER(i4) :: tsteps
REAL(r8) :: secs, seca, secb
REAL(r8) :: phi_old, term1, term2
REAL(r8) :: theta, m
WRITE(*, *) 'start of program', i4, r8
WRITE(*, *) 'Vern : ', compiler_version()
#ifdef _OPENMP
WRITE(*, *) 'Number of OMP threads = ', OMP_GET_MAX_THREADS()
#endif
CALL timer('start')
phi = 0.0
tempr = 0.0
secs = 0
seca = 0
secb = 0
DO j = 1, ny
DO i = 1, nx
IF ((i - nx/two)*(i - nx/two) + (j - ny/two)*(j - ny/two) < seed) THEN
phi(i, j) = one
END IF
END DO
END DO
CALL timer('initialise phi')
time_loop: DO tsteps = 1, nsteps
secs = secs + delta_sec()
#ifdef _USE_DO_CONCURRENT_
DO CONCURRENT (j=1:ny, i=1:nx) local(ip, im, jp, jm, theta) &
shared(lap_phi, lap_tempr, phi, tempr, phidx, phidy, &
epsil, epsilon_deriv)
#else
!$omp parallel do collapse(2) private(i, j, ip, im, jp, jm, theta)
DO j = 1, ny
DO i = 1, nx
#endif
jp = j + 1
jm = j - 1
ip = i + 1
im = i - 1
IF (im == 0) im = nx
IF (ip == (nx + 1)) ip = 1
IF (jm == 0) jm = ny
IF (jp == (ny + 1)) jp = 1
lap_phi(i, j) = (phi(ip, j) + phi(im, j) + phi(i, jm) &
+ phi(i, jp) - four*phi(i, j)) / (dx*dy)
lap_tempr(i, j) = (tempr(ip, j) + tempr(im, j) &
+ tempr(i, jm) + tempr(i, jp) - four*tempr(i, j)) / (dx*dy)
phidx(i, j) = (phi(ip, j) - phi(im, j)) / dx
phidy(i, j) = (phi(i, jp) - phi(i, jm)) / dy
theta = zatan2 (phidy(i, j), phidx(i, j))
epsil(i, j) = epsilonb*(one + delta*COS(aniso*(theta - theta0)))
epsilon_deriv(i, j) = &
-epsilonb*aniso*delta*SIN(aniso*(theta - theta0))
#ifdef _USE_DO_CONCURRENT_
END DO
#else
END DO
END DO
!$omp end parallel do
#endif
seca = seca + delta_sec()
#ifdef _USE_DO_CONCURRENT_
DO CONCURRENT (j=1:ny, i=1:nx) local(ip, im, jp, jm, phi_old, term1, term2, m) &
shared(phi, tempr, epsil, epsilon_deriv, phidx, phidy, lap_phi, lap_tempr)
#else
!$omp parallel do collapse(2) private(i, j, ip, im, jp, jm, phi_old, term1, term2, m)
DO j = 1, ny
DO i = 1, nx
#endif
jp = j + 1
jm = j - 1
ip = i + 1
im = i - 1
IF (im == 0) im = nx
IF (ip == (nx + 1)) ip = 1
IF (jm == 0) jm = ny
IF (jp == (ny + 1)) jp = 1
phi_old = phi(i, j)
term1 = (epsil(i, jp)*epsilon_deriv(i, jp)*phidx(i, jp) &
- epsil(i, jm)*epsilon_deriv(i, jm)*phidx(i, jm)) / dy
term2 = -(epsil(ip, j)*epsilon_deriv(ip, j)*phidy(ip, j) &
- epsil(im, j)*epsilon_deriv(im, j)*phidy(im, j)) / dx
m = alpha/pix * ATAN(gama*(teq - tempr(i, j)))
phi(i, j) = phi(i, j) + (dtime/tau) * (term1 + term2 &
+ epsil(i, j)**2 * lap_phi(i, j)) &
+ phi_old * (one - phi_old)*(phi_old - half + m)
tempr(i, j) = tempr(i, j) + dtime*lap_tempr(i, j) &
+ kappa * (phi(i, j) - phi_old)
#ifdef _USE_DO_CONCURRENT_
END DO
#else
END DO
END DO
!$omp end parallel do
#endif
secb = secb + delta_sec()
IF (MOD(tsteps, nprint) == 0) WRITE(*, *) 'Done steps = ', tsteps
END DO time_loop
CALL timer('time_loop')
WRITE(*, *) "nx=", nx , "ny=", ny
WRITE(*, '(A, 2X, F10.1, 2X, F10.1, 2X, F10.1)') 'stages:', secs, seca, secb
CONTAINS
PURE REAL(r8) FUNCTION zatan2(y, x)
! Function arguments
REAL(r8), INTENT(in) :: y, x
! Local variables
REAL(r8) :: res
IF (ABS(x) > 0.0 .OR. ABS (y) > 0.0) THEN
res = ATAN2(y, x)
ELSE
res = 0
END IF
zatan2 = res
END FUNCTION zatan2
SUBROUTINE timer(desc)
! Subroutine arguments
CHARACTER(len=*) desc
! Local variables
INTEGER(int64) :: clock, rate
INTEGER(int64), SAVE :: last_clock = -1
REAL(r8) :: sec
IF (last_clock < 0) THEN
CALL system_clock(last_clock, rate)
sec = 0
WRITE(*, '(F10.3, 2X, A)') sec, desc
ELSE
CALL system_clock(clock, rate)
sec = REAL(clock - last_clock, r8)/REAL(rate, r8)
WRITE(*, '(F10.1, 2X, A)') sec, desc
last_clock = clock
END IF
END SUBROUTINE timer
REAL(r8) FUNCTION delta_sec()
INTEGER(int64) :: clock, rate
INTEGER(int64), SAVE :: last_clock = -1
REAL(r8) :: sec
IF (last_clock < 0) THEN
CALL system_clock(last_clock, rate)
sec = 0
ELSE
CALL system_clock(clock, rate)
sec = REAL(clock - last_clock, r8)/REAL(rate, r8)
last_clock = clock
END IF
delta_sec = sec
END FUNCTION delta_sec
END PROGRAM openmp_test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment