#include <CL/cl.h>
#include "petsc.h"
#include "petscvec.h"
#include "petscviennacl.h"
#include <iostream>char kernel_source[] = "#define lid(N) ((int) get_local_id(N))\n"
"#define gid(N) ((int) get_group_id(N))\n"
"#if __OPENCL_C_VERSION__ < 120\n"
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
"#endif\n"
"\n"
"__kernel void __attribute__ ((reqd_work_group_size(33, 1, 1))) wrap_zero(int const start, int const end, __global double *__restrict__ dat0)\n"Generated code for the function --
extern "C" void wrap_form0_cell_integral_otherwise(int const start, int const end, Vec dat0, Vec dat1, Vec dat2, cl_mem map0, const int map0_size)
{
if(end == start)
{
// no need to go any further
return;
}
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pyopencl as cl | |
| import pyopencl.clrandom as cl_random | |
| from pyopencl.reduction import ReductionKernel | |
| import numpy as np | |
| from mako.template import Template | |
| ctx = cl.create_some_context() | |
| queue = cl.CommandQueue(ctx) | |
| n = 2**10 | |
| num_groups = 128 |
- Solving helmholtz for
2**19elements. - This is the log for the GPU implementation. Takes a total of ~27 seconds for the solve.
------------------------------------------------------------------------------------------------------------------------
Event Count Time (sec) Flop --- Global --- --- Stage ---- Total
Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %T %F %M %L %R %T %F %M %L %R Mflop/s
------------------------------------------------------------------------------------------------------------------------
--- Event Stage 0: Main Stage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pymbolic import parse | |
| from pymbolic.mapper import WalkMapper | |
| class EdMapper(WalkMapper): | |
| def __init__(self): | |
| self.lines = [] | |
| def map_variable(self, expr): | |
| if expr.name == 'a': |
---------------------------------------------------------------------------
KERNEL: wrap_form0_cell_integral_otherwise
---------------------------------------------------------------------------
ARGUMENTS:
dat0: type: np_atomic:dtype('float64'), shape: (None), dim_tags: (N0:stride:1) aspace: global
dat1: type: np:dtype('float64'), shape: (None, 2), dim_tags: (N1:stride:2, N0:stride:1) aspace: global
dat2: type: np:dtype('float64'), shape: (None), dim_tags: (N0:stride:1) aspace: global