This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! transfrom this: | |
Do step = 1, 100 | |
acc kernel | |
acc update() | |
Call diagnostics_on_cpu() | |
end | |
!Needs to change to: | |
Step=1 | |
acc kernel async(step) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for (int l = 0; l < ld; l++) | |
{ | |
for (int k = 0; k < kd; k++) | |
{ | |
for (int j = 0; j < jd; j++) | |
{ | |
if (l > 0 && l < (ld - 1) && k > 0 && k < (kd - 1) && j > 0 && j < (jd - 1)) | |
{ | |
jp = j + 1; | |
jm = j - 1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! Modern compilers with -O3 usually unroll loops when the start and stop bounds of the loop are known | |
! at compile time. Here is an example where I use a new secondary loop with fixed bounds to unroll by | |
! the amount specified in the parameter nunroll. This allows the routine to be general with only a change | |
! to nunroll (and a recompile) to unroll by a different amount. | |
program loop_unrolling | |
implicit none | |
integer :: i, ii, iend, istart | |
integer, parameter :: nunroll=2 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
world_rank=$PMIX_RANK | |
let local_size=$RANKS_PER_SOCKET | |
export CUDA_CACHE_PATH=/dev/shm/$USER/nvcache_$PMIX_RANK | |
executable=$1 | |
shift | |
if [ $world_rank = $PROFILE_RANK ]; then | |
nvprof -f -o $PROFILE_PATH $executable "$@" | |
else | |
$executable "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! Tested Oct 2024 with gfortran. | |
module nvtx_mod | |
use iso_c_binding | |
implicit none | |
integer,private :: col(7) = [ int(Z'0000ff00'), int(Z'000000ff'), int(Z'00ffff00'), int(Z'00ff00ff'),& | |
int(Z'0000ffff'), int(Z'00ff0000'), int(Z'00ffffff') ] | |
!character(len=256), private :: tempName | |
character, private, target :: tempName(256) |