Skip to content

Instantly share code, notes, and snippets.

View kaushikcfd's full-sized avatar

Kaushik Kulkarni kaushikcfd

View GitHub Profile
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file has been truncated, but you can view the full file.
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
inline double loopy_pow_float64_int64(double x, long n) {
if (n == 0)
return 1;
if (n < 0) {
// See https://github.com/inducer/pytato/pull/255
// i = 6
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
inline double powf64(double x, double y) {
return pow(x, y);
}
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) _pt_kernel(__global double const *__restrict__ _pt_data, __global double const *__restrict__ _pt_data_0, __global double const *__restrict__ _pt_data_1, __global double const *__restrict__ _pt_data_2, __global double const *__restrict__ _pt_data_3, __global double const *__restrict__ _actx_in_1_0_0, __global double const *__restrict__ _pt_data_4, __global double const *__restrict__ _pt_data_5, __global long const *__restrict__ _pt_data_6, __global long const *__restrict__ _pt_data_7, __global int const *__restrict__ _pt_data_8, __global long const *__restrict__ _pt_data_9, __global int const *__restrict__ _pt_data_10, __global long const *__restrict__ _pt_data_11, __global int const *__restrict__ _pt_data_12, __global long const *__restrict__ _pt_data_13, __global int const *__restrict__ _
import jax.numpy as jnp
from jax import jit
@jit
def _pt_kernel(*, _pt_data_222, _pt_data_247, _pt_data_6, _pt_data_181, _pt_data_45, _pt_data_95, _pt_data_199, _pt_data_12, _pt_data_32, _pt_data_204, _pt_data_265, _pt_data_276, _pt_data_284, _pt_data_98, _pt_data_62, _pt_data_174, _pt_data_260, _pt_data_56, _pt_data_28, _pt_data_61, _pt_data_115, _pt_data_241, _pt_data_92, _pt_data_159, _pt_data_252, _pt_data_280, _pt_data_40, _pt_data_226, _pt_data_126, _pt_data_131, _pt_data_83, _pt_data_256, _pt_data_82, _pt_data_66, _pt_data_208, _pt_data_2, _pt_data_195, _pt_data_4, _pt_data_26, _pt_data_104, _pt_data_106, _pt_data_207, _pt_data_113, _pt_data_274, _pt_data_134, _pt_data_235, _pt_data_158, _pt_data_253, _pt_data_107, _pt_data_184, _pt_data_183, _pt_data_242, _pt_data_42, _pt_data_50, _pt_data_89, _pt_data_57, _pt_data_160, _pt_data_60, _pt_data_210, _pt_data_224, _pt_data_229, _pt_data_8, _actx_in_1_3_0, _pt_data_133, _pt_data_152, _pt_data_190, _actx_in_1_2_0, _pt_data_49, _pt_data_79, _pt_data_117, _
import jax.numpy as jnp
import numpy as np
def _pt_kernel(*, _actx_dw_153, _actx_dw_249, _actx_dw_6, _actx_dw_86, _actx_dw_68, _actx_dw_83, _actx_dw_186, _actx_dw_126, _actx_dw_43, _actx_dw_221, _actx_dw_65, _actx_dw_49, _actx_dw_59, _actx_dw_72, _actx_dw_133, _actx_dw_146, _actx_dw_161, _actx_dw_188, _actx_dw_96, _actx_dw_64, _actx_dw_129, _actx_dw_21, _actx_dw_31, _actx_dw_104, _actx_dw_151, _actx_dw_199, _actx_dw_227, _actx_dw_82, _actx_dw_218, _actx_dw_28, _actx_dw_34, _actx_dw_115, _actx_dw_66, _actx_dw_120, _actx_dw_175, _actx_dw_2, _actx_dw_210, _actx_dw_215, _actx_dw_67, _actx_dw_152, _actx_dw_222, _actx_dw_121, _actx_dw_183, _actx_dw_150, _actx_dw_89, _actx_dw_39, _actx_dw_35, _actx_dw_176, _actx_dw_42, _actx_dw_162, _actx_dw_110, _actx_dw_63, _actx_dw_73, _actx_dw_17, _actx_dw_113, _actx_dw_61, _actx_dw_139, _actx_dw_219, _actx_dw_60, _actx_dw_208, _actx_dw_204, _actx_dw_238, _actx_dw_248, _actx_dw_105, _actx_dw_70, _actx_dw_171, _actx_dw_225, _actx_dw_52, _actx_dw_106, _actx_dw_206, _actx_dw_173,
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) _pt_kernel(__global double const *__restrict__ _pt_data, __global double const *__restrict__ _pt_data_0, __global double const *__restrict__ _pt_data_1, __global double const *__restrict__ _pt_data_2, __global double const *__restrict__ _actx_in_1_momentum_1_0, __global double const *__restrict__ _pt_data_3, __global double const *__restrict__ _pt_data_4, __global long const *__restrict__ _pt_data_5, __global long const *__restrict__ _pt_data_6, __global int const *__restrict__ _pt_data_7, __global long const *__restrict__ _pt_data_8, __global int const *__restrict__ _pt_data_9, __global long const *__restrict__ _pt_data_10, __global int const *__restrict__ _pt_data_11, __global long const *__restrict__ _pt_data_12, __global int const *__restrict__ _pt_data_13, __global long const *__restr
./cuda_vdpau_interop.h: * ::cuGraphicsVDPAURegisterVideoSurface
./cuda_vdpau_interop.h: * ::cuGraphicsVDPAURegisterOutputSurface
./generated_cuda_meta.h:typedef struct cuGraphCreate_params_st {
./generated_cuda_meta.h:} cuGraphCreate_params;
./generated_cuda_meta.h:typedef struct cuGraphAddKernelNode_params_st {
./generated_cuda_meta.h:} cuGraphAddKernelNode_params;
./generated_cuda_meta.h:typedef struct cuGraphKernelNodeGetParams_params_st {
./generated_cuda_meta.h:} cuGraphKernelNodeGetParams_params;
./generated_cuda_meta.h:typedef struct cuGraphKernelNodeSetParams_params_st {
./generated_cuda_meta.h:} cuGraphKernelNodeSetParams_params;
This file has been truncated, but you can view the full file.
---------------------------------------------------------------------------
KERNEL: _pt_kernel
---------------------------------------------------------------------------
ARGUMENTS:
_actx_in_0: type: np:dtype('float64'), shape: (), offset: <class 'loopy.kernel.data.auto'> aspace: global
_actx_in_1_energy_0: type: np:dtype('float64'), shape: (107554, 35), dim_tags: (N1:stride:35, N0:stride:1), offset: <class 'loopy.kernel.data.auto'> aspace: global
_actx_in_1_mass_0: type: np:dtype('float64'), shape: (107554, 35), dim_tags: (N1:stride:35, N0:stride:1), offset: <class 'loopy.kernel.data.auto'> aspace: global
_actx_in_1_momentum_0_0: type: np:dtype('float64'), shape: (107554, 35), dim_tags: (N1:stride:35, N0:stride:1), offset: <class 'loopy.kernel.data.auto'> aspace: global
_actx_in_1_momentum_1_0: type: np:dtype('float64'), shape: (107554, 35), dim_tags: (N1:stride:35, N0:stride:1), offset: <class 'loopy.kernel.data.auto'> aspace: global
_actx_in_1_momentum_2_0: type: np:dtype('float64'), shape: (107554, 35), d
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) _pt_kernel(__global double const *__restrict__ _pt_data, __global double const *__restrict__ _pt_data_0, __global double const *__restrict__ _pt_data_1, __global double const *__restrict__ _pt_data_2, __global double const *__restrict__ _pt_data_3, __global double const *__restrict__ _actx_in_1_0_0, __global double const *__restrict__ _pt_data_4, __global double const *__restrict__ _pt_data_5, __global long const *__restrict__ _pt_data_6, __global long const *__restrict__ _pt_data_7, __global long const *__restrict__ _pt_data_8, __global int const *__restrict__ _pt_data_9, __global long const *__restrict__ _pt_data_10, __global long const *__restrict__ _pt_data_11, __global int const *__restrict__ _pt_data_12, __global long const *__restrict__ _pt_data_13, __global long const *__restrict__