Created
December 1, 2016 08:04
-
-
Save inferrna/ccbcedec405c600970a5a097cf995571 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ OFFSET32_BIT=1 CLANG_HOME=/usr/lib/llvm-3.8 py.test -svx test/tf/test_tf.py======================================================================== test session starts ========================================================================= | |
platform linux -- Python 3.5.2, pytest-2.9.1, py-1.4.31, pluggy-0.3.1 -- /usr/bin/python3 | |
cachedir: .cache | |
rootdir: /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl, inifile: pytest.ini | |
collected 2 items | |
test/tf/test_tf.py::test_cwise_sqrt context <pyopencl.Context at 0x222ece0 on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x22b9f30>> | |
options [] | |
opt_options [] | |
iropencl_options [] | |
running [/usr/lib/llvm-3.8/bin/opt -S test/tf/samples/cwise_op_gpu_sqrt-device-noopt.ll -o /tmp/test-opt.ll] | |
running [build/ir-to-opencl --inputfile /tmp/test-opt.ll --outputfile /tmp/test-device.cl --kernelname _ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_] | |
F name _ZN5Eigen8internal15EigenMetaKe | |
running generation on _ZN5Eigen8internal15EigenMetaKe | |
running generation on _ZN5Eigen15TensorEvaluatorIK0 | |
running generation on _ZN5Eigen15TensorEvaluatorIK1 | |
running generation on _ZN5Eigen15TensorEvaluatorIK1 | |
running generation on _ZN5Eigen15TensorEvaluatorIN2 | |
running generation on _ZN5Eigen15TensorEvaluatorIN9 | |
running generation on _ZNK5Eigen15TensorEvaluatorI3 | |
running generation on _ZNK5Eigen15TensorEvaluatorI8 | |
running generation on _ZN5Eigen15TensorEvaluatorIK1 | |
running generation on _ZN5Eigen8internal6pstoreIf64_gp | |
running generation on _ZNK5Eigen15TensorEvaluatorI11 | |
running generation on _ZNK5Eigen15TensorEvaluatorI6 | |
running generation on _ZNK5Eigen8internal14scalar_10 | |
running generation on _ZNK5Eigen8internal14scalar_5 | |
running generation on _ZN5Eigen8internal5psqrtI6fl7 | |
creating program... | |
building kernel... | |
/usr/local/lib/python3.5/dist-packages/pyopencl/__init__.py:206: CompilerWarning: Non-empty compiler output encountered. Set the environment variable PYOPENCL_COMPILER_OUTPUT=1 to see more. | |
"to see more.", CompilerWarning) | |
running kernel... | |
FAILED | |
====================================================================== short test summary info ======================================================================= | |
FAIL test/tf/test_tf.py::test_cwise_sqrt | |
============================================================================== FAILURES ============================================================================== | |
__________________________________________________________________________ test_cwise_sqrt ___________________________________________________________________________ | |
context = <pyopencl.Context at 0x222ece0 on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x22b9f30>> | |
q = <pyopencl.cffi_cl.CommandQueue object at 0x7f22d07aa668> | |
float_data = array([ 0.28847906, -0.46295407, -1.33800447, ..., 0.85320115, | |
-0.70543814, 0.55171764], dtype=float32) | |
float_data_gpu = <pyopencl.cffi_cl.Buffer object at 0x7f22d07aa630> | |
@pytest.mark.skipif(os.environ.get('TRAVIS', None) == 'true', reason='fails on travis mac cpu, not looked into why yet') | |
def test_cwise_sqrt(context, q, float_data, float_data_gpu): | |
options = test_common.cocl_options() | |
i = 0 | |
opt_options = [] | |
iropencl_options = [] | |
while i < len(options): | |
if options[i] == '--devicell-opt': | |
opt_options.append('-' + options[i + 1]) | |
i += 2 | |
continue | |
if options[i] in ['--run_branching_transforms', '--branches_as_switch']: | |
iropencl_options.append(options[i]) | |
i += 1 | |
continue | |
raise Exception('unknown option ', options[i]) | |
i += 1 | |
print('opt_options', opt_options) | |
print('iropencl_options', iropencl_options) | |
myrun([ | |
join(CLANG_HOME, 'bin/opt') | |
] + opt_options + [ | |
'-S', | |
'test/tf/samples/cwise_op_gpu_sqrt-device-noopt.ll', | |
'-o', '/tmp/test-opt.ll' | |
]) | |
myrun([ | |
'build/ir-to-opencl' | |
] + iropencl_options + [ | |
'--inputfile', '/tmp/test-opt.ll', | |
'--outputfile', '/tmp/test-device.cl', | |
'--kernelname', '_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_' | |
]) | |
with open('/tmp/test-device.cl', 'r') as f: | |
cl_sourcecode = f.read() | |
print('creating program...') | |
prog_unbuilt = cl.Program(context, cl_sourcecode) | |
print('building kernel...') | |
prog = prog_unbuilt.build() | |
N = 10 | |
# global struct Eigen__TensorEvaluator_nopointers* eval_nopointers, global float* eval_ptr0, long eval_ptr0_offset, global float* eval_ptr1, long eval_ptr1_offset, int size, local int *scratch | |
# what we need: | |
# struct Eigen__TensorEvaluator_nopointers Note that none of the values we copy across are actually use, so we can just create a sufficiently large buffer... | |
# global float *eval_ptr0 => this will receive the result. just create a sufficiently large buffer | |
# ptr0_offset => 0 | |
# eval_ptr1 => will contian the data we want to reduce | |
# eval_ptr1_offset=> 0 | |
# size => eg 10, to reduce 10 values | |
# scratch => set to workgroupsize * sizeof(float) | |
eval_nopointers_gpu = cl.Buffer(context, cl.mem_flags.READ_WRITE, size=4096) | |
eval_ptr0 = np.zeros(1024, dtype=np.float32) | |
eval_ptr0_gpu = cl.Buffer(context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=eval_ptr0) | |
eval_ptr0_offset = 0 | |
eval_ptr1 = np.random.uniform(0, 1, size=(1024,)).astype(np.float32) + 1.0 | |
eval_ptr1_gpu = cl.Buffer(context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=eval_ptr1) | |
eval_ptr1_offset = 0 | |
size = N | |
global_size = 256 | |
workgroup_size = 256 | |
scratch = workgroup_size * 4 | |
print('running kernel...') | |
prog.__getattr__('_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_'[:31])( | |
q, (global_size,), (workgroup_size,), | |
eval_nopointers_gpu, | |
eval_ptr0_gpu, offset_type(eval_ptr0_offset), | |
eval_ptr1_gpu, offset_type(eval_ptr1_offset), | |
np.int32(size), | |
> cl.LocalMemory(scratch) | |
) | |
test/tf/test_tf.py:108: | |
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
/usr/local/lib/python3.5/dist-packages/pyopencl/__init__.py:995: in kernel_call | |
return self._enqueue(self, queue, global_size, local_size, *args, **kwargs) | |
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
self = <pyopencl.cffi_cl.Kernel object at 0x7f22d079aef0>, queue = <pyopencl.cffi_cl.CommandQueue object at 0x7f22d07aa668>, global_size = (256,), local_size = (256,) | |
arg0 = <pyopencl.cffi_cl.Buffer object at 0x7f22de3f1470>, arg1 = <pyopencl.cffi_cl.Buffer object at 0x7f22de3f1198>, arg2 = 0 | |
arg3 = <pyopencl.cffi_cl.Buffer object at 0x7f22d079ae80>, arg4 = 0, arg5 = 10, arg6 = <pyopencl.cffi_cl.LocalMemory object at 0x7f22d07d2f88>, global_offset = None | |
g_times_l = None, wait_for = None | |
> ??? | |
E pyopencl.cffi_cl.LogicError: when processing argument #3 (1-based): clsetkernelarg failed: INVALID_ARG_SIZE | |
<generated function enqueue_knl__ZN5Eigen8internal15EigenMetaKe>:138: LogicError | |
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Interrupted: stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
====================================================================== 1 failed in 0.81 seconds ====================================================================== |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment