Two-phase data mapping: ALIGN, DISTRUBUTE
Avoid contention: for rows data, the can be updated in parallel.
Locality of reference: two data always works on the same instructor.
BLOCK is good for local communication. (相近数据的操作可以减少inter-processor communication)
a(i, j) = a(i+1, j) + a(i-1, j) + a(i, j+1) + a(i, j-1)
CYLINC has non-obvious locality.
a(i+kP) // cyclic
PROGRAM HPF_FINITE_DIFFERENCE
DOUBLE PRECISION X(100, 100), NEW(100, 100)
!HPF$ PROCESSOR PR(2, 2)
!HPF$ ALIGN X(:, :) WITH NEW(:, :)
!HPF$ DISTRIBUTE X(BLOCK, BLCOK) ONTO PR
DO K = 1, ITERS
FORALL (I = 2:99, J = 2:99)
NEW(I, J) = (X(I - 1, J) + X(I + 1, J) + X(I, J - 1) + X(I, J + 1)) / 4
DIFFMAX = maxval(abs(NEW - X))
FORALL (I = 2:99, J = 2:99)
X(I, J) = NEW(I, J)
IF (DIFFMAX.LT.SMALL)
GOTO 40
30 CONTINUE
40 END
PROGRAM HPF_FINITE_DIFFERENCE
REAL A(256, 256), B(256, 256), C(256, 256)
INTEGER I, J, K
!HPF$ PROCESSORS P(4, 4)
!HPF$ ALIGN C WITH A, B
!HPF$ DISTRIBUTE C(BLCOK, BLOCK)
C = 0
DO K = 1, 256
FORALL(I = 1:256, J = 1:256) C(I, J) = C(I, J) + A(I, K) * B(K, J)
PROGRAM XXX
DOUBLE PRECISION H, SUM, X, PI, F, A
INTEGER N
!HPF$ PROCESSORS P(4)
!HPF$ ALLIGN X(:), MPOINT(:) WITH HEIGHT
!HPF$ DISTRIBUTE HEIGHT(BLOCK) ONTO P
...
H = ...;
X = H*(MPOINT - 0.5)
HEIGHT = 4 / (1 + X*X)
PI = H * SUM(HEIGHT)