Created
December 6, 2017 20:05
-
-
Save kaushikcfd/f2412138d014901575e68471623ee97e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define lid(N) ((int) get_local_id(N)) | |
#define gid(N) ((int) get_group_id(N)) | |
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable | |
#if __OPENCL_C_VERSION__ < 120 | |
#pragma OPENCL EXTENSION cl_khr_fp64: enable | |
#endif | |
__constant double const cnst[3] = { -1.0, 1.0, 0.0 }; | |
__constant double const cnst_0[3] = { -1.0, 0.0, 1.0 }; | |
__kernel void __attribute__ ((reqd_work_group_size(32, 1, 1))) loopy_kernel_and_loopy_kernel_and_tsfc_kernel_and_loopy_kernel(int const w_0_global_len, __global double const *__restrict__ w_0_global, __global int const *__restrict__ ltg_1, int const nelements, int const coords_global_len, __global double const *__restrict__ coords_global, __global int const *__restrict__ ltg_0, __global volatile double *__restrict__ A0_global, int const A0_size) | |
{ | |
if (66048 + -32 * gid(0) + -1 * lid(0) >= 0) | |
A0_global[32 * gid(0) + lid(0)] = 0.0; | |
} | |
__kernel void __attribute__ ((reqd_work_group_size(32, 1, 1))) loopy_kernel_and_loopy_kernel_and_tsfc_kernel_and_loopy_kernel_0(int const w_0_global_len, __global double const *__restrict__ w_0_global, __global int const *__restrict__ ltg_1, int const nelements, int const coords_global_len, __global double const *__restrict__ coords_global, __global int const *__restrict__ ltg_0, __global volatile double *__restrict__ A0_global, int const A0_size) | |
{ | |
for (int ibf_gather_0 = 0; ibf_gather_0 <= 2; ++ibf_gather_0) | |
{ | |
double loopy_old_val; | |
double loopy_new_val; | |
do | |
{ | |
loopy_old_val = A0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + ibf_gather_0]]; | |
loopy_new_val = loopy_old_val + cnst[ibf_gather_0] * (((-1.0 * w_0_global[ltg_1[3 * (32 * gid(0) + lid(0))]] + w_0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) + (-1.0 * w_0_global[ltg_1[3 * (32 * gid(0) + lid(0))]] + w_0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + 2]]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) * -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) + ((-1.0 * w_0_global[ltg_1[3 * (32 * gid(0) + lid(0))]] + w_0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + 1]]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) * -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) + (-1.0 * w_0_global[ltg_1[3 * (32 * gid(0) + lid(0))]] + w_0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1]))) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) * -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]])) * 0.5 * fabs((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) + cnst_0[ibf_gather_0] * (((-1.0 * w_0_global[ltg_1[3 * (32 * gid(0) + lid(0))]] + w_0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) + (-1.0 * w_0_global[ltg_1[3 * (32 * gid(0) + lid(0))]] + w_0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + 2]]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) * -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) * -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1]) + ((-1.0 * w_0_global[ltg_1[3 * (32 * gid(0) + lid(0))]] + w_0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + 1]]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])) * -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) + (-1.0 * w_0_global[ltg_1[3 * (32 * gid(0) + lid(0))]] + w_0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1]))) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * 1.0 / ((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1]))) * 0.5 * fabs((-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))]] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0))] + 1] + coords_global[2 * ltg_0[3 * (32 * gid(0) + lid(0)) + 1] + 1])); | |
} | |
while (atom_cmpxchg((__global long *) &(A0_global[ltg_1[3 * (32 * gid(0) + lid(0)) + ibf_gather_0]]), *(long *) &loopy_old_val, *(long *) &loopy_new_val) != *(long *) &loopy_old_val); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment