Skip to content

Instantly share code, notes, and snippets.

@kaushikcfd
Created December 8, 2017 21:28
Show Gist options
  • Save kaushikcfd/53cf6824bf9fdd261558fd5416230ee2 to your computer and use it in GitHub Desktop.
Save kaushikcfd/53cf6824bf9fdd261558fd5416230ee2 to your computer and use it in GitHub Desktop.
This is the serial version of the mass matrix.
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__constant double const cnst[3 * 3] = { 0.6666666666666669, 0.16666666666666663, 0.16666666666666666, 0.16666666666666674, 0.16666666666666663, 0.6666666666666665, 0.16666666666666669, 0.6666666666666666, 0.16666666666666663 };
__constant double const cnst_0[3] = { 0.16666666666666666, 0.16666666666666666, 0.16666666666666666 };
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel_and_loopy_kernel_and_tsfc_kernel_and_loopy_kernel(int const w_0_global_len, __global double const *__restrict__ w_0_global, __global int const *__restrict__ ltg_1, int const nelements, int const coords_global_len, __global double const *__restrict__ coords_global, __global int const *__restrict__ ltg_0, __global volatile double *__restrict__ A0_global, int const A0_size)
{
for (int i_init_0 = 0; i_init_0 <= 1088; ++i_init_0)
A0_global[i_init_0] = 0.0;
}
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel_and_loopy_kernel_and_tsfc_kernel_and_loopy_kernel_0(int const w_0_global_len, __global double const *__restrict__ w_0_global, __global int const *__restrict__ ltg_1, int const nelements, int const coords_global_len, __global double const *__restrict__ coords_global, __global int const *__restrict__ ltg_0, __global volatile double *__restrict__ A0_global, int const A0_size)
{
double acc_i12;
double sum_tmp_0[3];
for (int iel = 0; iel <= 2047; ++iel)
{
for (int i1_0 = 0; i1_0 <= 2; ++i1_0)
{
acc_i12 = 0.0;
for (int i12 = 0; i12 <= 2; ++i12)
acc_i12 = acc_i12 + cnst[3 * i12 + i1_0] * (cnst[3 * i12 + 2] * w_0_global[ltg_1[3 * iel + 2]] + cnst[3 * i12] * w_0_global[ltg_1[3 * iel]] + cnst[3 * i12 + 1] * w_0_global[ltg_1[3 * iel + 1]]) * cnst_0[i12] * fabs((-1.0 * coords_global[2 * ltg_0[3 * iel]] + coords_global[2 * ltg_0[3 * iel + 1]]) * (-1.0 * coords_global[2 * ltg_0[3 * iel] + 1] + coords_global[2 * ltg_0[3 * iel + 2] + 1]) + -1.0 * (-1.0 * coords_global[2 * ltg_0[3 * iel]] + coords_global[2 * ltg_0[3 * iel + 2]]) * (-1.0 * coords_global[2 * ltg_0[3 * iel] + 1] + coords_global[2 * ltg_0[3 * iel + 1] + 1]));
sum_tmp_0[i1_0] = acc_i12;
}
for (int ibf_gather_0 = 0; ibf_gather_0 <= 2; ++ibf_gather_0)
{
double loopy_old_val;
double loopy_new_val;
do
{
loopy_old_val = A0_global[ltg_1[3 * iel + ibf_gather_0]];
loopy_new_val = loopy_old_val + sum_tmp_0[ibf_gather_0];
}
while (atom_cmpxchg((__global long *) &(A0_global[ltg_1[3 * iel + ibf_gather_0]]), *(long *) &loopy_old_val, *(long *) &loopy_new_val) != *(long *) &loopy_old_val);
}
}
}
@kaushikcfd
Copy link
Author

kaushikcfd commented Dec 8, 2017

For this kernel:

  • nelements = 32*32*2 = 2048
  • A0_size = 1088

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment