Created
December 22, 2022 11:41
-
-
Save kaushikcfd/44bc70d3431fcdc7b391505ee4026cfd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define lid(N) ((int) get_local_id(N)) | |
#define gid(N) ((int) get_group_id(N)) | |
#if __OPENCL_C_VERSION__ < 120 | |
#pragma OPENCL EXTENSION cl_khr_fp64: enable | |
#endif | |
__kernel void __attribute__ ((reqd_work_group_size(9, 10, 1))) loopy_kernel(__global double const *__restrict__ u_ary, __global double const *__restrict__ J_ary, __global double const *__restrict__ D_ary, int const Nel, __global double *__restrict__ out) | |
{ | |
__local double D_fetch[3 * 9 * 9]; | |
double J_0[3 * 3]; | |
double acc_r_j_tile_j_inner[4]; | |
double acc_x; | |
__local double subst_0[10 * 9 * 3]; | |
if (-1 + -10 * gid(0) + -1 * lid(1) + Nel >= 0) | |
{ | |
for (int J_prftch_r = 0; J_prftch_r <= 2; ++J_prftch_r) | |
for (int J_prftch_x = 0; J_prftch_x <= 2; ++J_prftch_x) | |
J_0[3 * J_prftch_x + J_prftch_r] = J_ary[Nel * 3 * J_prftch_x + Nel * J_prftch_r + 10 * gid(0) + lid(1)]; | |
{ | |
int const i_inner_outer_0 = 0; | |
for (int i_tile_0 = 0; i_tile_0 <= ((-1 + Nel + -10 * gid(0) + -1 * lid(1) >= 0 && lid(0) + -1 * lid(1) >= 0) ? 3 + -1 * lid(0) + (7 + 8 * lid(0)) / 9 : 3 + -1 * lid(1) + (7 + 8 * lid(1)) / 9); ++i_tile_0) | |
if (34 + -9 * i_tile_0 + -1 * lid(0) >= 0) | |
acc_r_j_tile_j_inner[i_tile_0] = 0.0; | |
} | |
} | |
for (int j_tile = 0; j_tile <= 3; ++j_tile) | |
{ | |
barrier(CLK_LOCAL_MEM_FENCE) /* for subst_0 (prcmpt_x_redn rev-depends on insn_r_j_tile_j_inner_update) */; | |
if (-1 + -1 * lid(1) + -10 * gid(0) + Nel >= 0) | |
{ | |
int const jprcmpt_subst_outer = 0; | |
if (34 + -9 * j_tile + -1 * lid(0) >= 0) | |
for (int rprcmpt_subst = 0; rprcmpt_subst <= 2; ++rprcmpt_subst) | |
{ | |
acc_x = 0.0; | |
for (int x = 0; x <= 2; ++x) | |
acc_x = acc_x + J_0[3 * x + rprcmpt_subst] * u_ary[35 * Nel * x + 35 * (10 * gid(0) + lid(1)) + 9 * j_tile + lid(0)]; | |
subst_0[27 * lid(1) + 3 * lid(0) + rprcmpt_subst] = acc_x; | |
} | |
} | |
for (int i_tile = 0; i_tile <= 3; ++i_tile) | |
{ | |
barrier(CLK_LOCAL_MEM_FENCE) /* for D_fetch (D rev-depends on insn_r_j_tile_j_inner_update) */; | |
{ | |
int const iprftchD_outer = 0; | |
if (34 + -9 * i_tile + -1 * lid(1) >= 0 && 8 + -1 * lid(1) >= 0) | |
{ | |
int const jprftchD_outer = 0; | |
if (34 + -9 * j_tile + -1 * lid(0) >= 0) | |
for (int rprftchD = 0; rprftchD <= 2; ++rprftchD) | |
D_fetch[81 * rprftchD + 9 * lid(1) + lid(0)] = D_ary[1225 * rprftchD + 35 * (9 * i_tile + lid(1)) + 9 * j_tile + lid(0)]; | |
} | |
} | |
barrier(CLK_LOCAL_MEM_FENCE) /* for D_fetch (insn_r_j_tile_j_inner_update depends on D) */; | |
if (-1 + -1 * lid(1) + -10 * gid(0) + Nel >= 0) | |
{ | |
int const i_inner_outer = 0; | |
if (34 + -1 * lid(0) + -9 * i_tile >= 0) | |
for (int j_inner = 0; j_inner <= ((-3 + j_tile == 0 && -1 + Nel + -10 * gid(0) + -1 * lid(1) >= 0 && 34 + -9 * i_tile + -1 * lid(1) >= 0 && 8 + -1 * lid(1) >= 0 && 7 + -1 * lid(0) >= 0 && 7 + -1 * lid(0) >= 0) ? 7 : 8); ++j_inner) | |
if (34 + -1 * j_inner + -9 * j_tile >= 0) | |
for (int r = 0; r <= 2; ++r) | |
acc_r_j_tile_j_inner[i_tile] = acc_r_j_tile_j_inner[i_tile] + subst_0[27 * lid(1) + 3 * j_inner + r] * D_fetch[81 * r + 9 * lid(0) + j_inner]; | |
} | |
} | |
} | |
if (-1 + -10 * gid(0) + -1 * lid(1) + Nel >= 0) | |
{ | |
int const i_inner_outer_1 = 0; | |
for (int i_tile_1 = 0; i_tile_1 <= ((-1 + Nel + -10 * gid(0) + -1 * lid(1) >= 0 && lid(0) + -1 * lid(1) >= 0) ? 3 + -1 * lid(0) + (7 + 8 * lid(0)) / 9 : 3 + -1 * lid(1) + (7 + 8 * lid(1)) / 9); ++i_tile_1) | |
if (34 + -9 * i_tile_1 + -1 * lid(0) >= 0) | |
out[35 * (10 * gid(0) + lid(1)) + lid(0) + 9 * i_tile_1] = acc_r_j_tile_j_inner[i_tile_1]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment