Last active
November 28, 2016 16:35
-
-
Save inferrna/d1e608b4bdec84f7ace00993be608252 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ OFFSET32_BIT=1 CLANG_HOME=/usr/lib/llvm-3.8 py.test -svx test/test_compile.py -k pointerpointer.cu-myte6kernel | |
======================================================================== test session starts ========================================================================= | |
platform linux -- Python 3.5.2, pytest-2.9.1, py-1.4.31, pluggy-0.3.1 -- /usr/bin/python3 | |
cachedir: .cache | |
rootdir: /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl, inifile: pytest.ini | |
collecting 0 itemsmarking xfail | |
marking xfail | |
marking xfail | |
marking xfail | |
collected 9 items | |
test/test_compile.py::test_compile[test/pointerpointer.cu-myte6kernel] context <pyopencl.Context at 0x1c2fc60 on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x1cbcbd0>> | |
options [] | |
bin/cocl -c /tmp/testprog.cu | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
2 warnings generated. | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
build/ir-to-opencl --inputfile /tmp/testprog-device.ll --outputfile /tmp/testprog-device.cl --kernelname --add_ir_to_cl | |
terminate called after throwing an instance of 'std::runtime_error' | |
what(): Couldnt find kernel | |
mangledname _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii | |
options [] | |
bin/cocl -c /tmp/testprog.cu | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
2 warnings generated. | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
build/ir-to-opencl --inputfile /tmp/testprog-device.ll --outputfile /tmp/testprog-device.cl --kernelname _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii --add_ir_to_cl | |
FAILED | |
====================================================================== short test summary info ======================================================================= | |
FAIL test/test_compile.py::test_compile[test/pointerpointer.cu-myte6kernel] | |
============================================================================== FAILURES ============================================================================== | |
__________________________________________________________ test_compile[test/pointerpointer.cu-myte6kernel] __________________________________________________________ | |
context = <pyopencl.Context at 0x1c2fc60 on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x1cbcbd0>> | |
cu_filepath = 'test/pointerpointer.cu', kernelname = 'myte6kernel' | |
@pytest.mark.parametrize("cu_filepath,kernelname", get_test_definitions()) | |
def test_compile(context, cu_filepath, kernelname): | |
with open(cu_filepath, 'r') as f: | |
cu_code = f.read() | |
try: | |
cl_code = test_common.cu_to_cl(cu_code, '') | |
except: | |
pass | |
with open('/tmp/testprog-device.ll') as f: | |
ll_code = f.read() | |
for line in ll_code.split('\n'): | |
if line.startswith('define') and kernelname in line: | |
mangledname = line.split('@')[1].split('(')[0] | |
break | |
print('mangledname', mangledname) | |
cl_code = test_common.cu_to_cl(cu_code, mangledname) | |
> test_common.build_kernel(context, cl_code, mangledname) | |
test/test_compile.py:47: | |
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
test/test_common.py:232: in build_kernel | |
prog = cl.Program(context, cl_sourcecode).build() | |
/usr/local/lib/python3.5/dist-packages/pyopencl/__init__.py:382: in build | |
options=options, source=self._source) | |
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
self = <pyopencl.Program object at 0x7f30a1f62c88>, build_func = <function Program.build.<locals>.<lambda> at 0x7f30b019fe18> | |
options = ['-I', '/usr/local/lib/python3.5/dist-packages/pyopencl/cl'] | |
source = 'struct class_HalfImpl {\n short f0;\n};\nstruct class_GpuDevice {\n int f0;\n global struct class_StreamInte...itofp v8 */;\n v10 = (float)(v6 + 123);\n /* void v11 = store v10 data */;\n data[0] = v10;\n return;\n}\n' | |
def _build_and_catch_errors(self, build_func, options, source=None): | |
try: | |
return build_func() | |
except _cl.RuntimeError as e: | |
what = e.what | |
if options: | |
what = what + "\n(options: %s)" % " ".join(options) | |
if source is not None: | |
from tempfile import NamedTemporaryFile | |
srcfile = NamedTemporaryFile(mode="wt", delete=False, suffix=".cl") | |
try: | |
srcfile.write(source) | |
finally: | |
srcfile.close() | |
what = what + "\n(source saved as %s)" % srcfile.name | |
code = e.code | |
routine = e.routine | |
err = _cl.RuntimeError( | |
_ErrorRecord( | |
what=lambda: what, | |
code=lambda: code, | |
routine=lambda: routine)) | |
# Python 3.2 outputs the whole list of currently active exceptions | |
# This serves to remove one (redundant) level from that nesting. | |
> raise err | |
E pyopencl.cffi_cl.RuntimeError: clbuildprogram failed: BUILD_PROGRAM_FAILURE - | |
E | |
E Build on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x1cbcbd0>: | |
E | |
E "/tmp/OCL32651T1.cl", line 34: error: kernel arguments can't be declared with | |
E types | |
E bool/half/size_t/ptrdiff_t/intptr_t/uintptr_t/pointer-to-pointer | |
E kernel void _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(global struct class_TensorEvaluator6* structs, uint structs_offset, global float* data, uint data_offset, global struct class_GpuDevice* gpudevices, uint gpudevices_offset, int a, int b, int c, local int *scratch); | |
E ^ | |
E | |
E "/tmp/OCL32651T1.cl", line 36: error: kernel arguments can't be declared with | |
E types | |
E bool/half/size_t/ptrdiff_t/intptr_t/uintptr_t/pointer-to-pointer | |
E kernel void _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(global struct class_TensorEvaluator6* structs, uint structs_offset, global float* data, uint data_offset, global struct class_GpuDevice* gpudevices, uint gpudevices_offset, int a, int b, int c, local int *scratch) { | |
E ^ | |
E | |
E "/tmp/OCL32651T1.cl", line 46: warning: label "v1" was declared but never | |
E referenced | |
E v1:; | |
E ^ | |
E | |
E 2 errors detected in the compilation of "/tmp/OCL32651T1.cl". | |
E Frontend phase failed compilation. | |
E | |
E (options: -I /usr/local/lib/python3.5/dist-packages/pyopencl/cl) | |
E (source saved as /tmp/tmpf8iufbw0.cl) | |
/usr/local/lib/python3.5/dist-packages/pyopencl/__init__.py:417: RuntimeError | |
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Interrupted: stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
====================================================== 8 tests deselected by '-kpointerpointer.cu-myte6kernel' ======================================================= | |
=============================================================== 1 failed, 8 deselected in 6.30 seconds =============================================================== |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct class_HalfImpl { | |
short f0; | |
}; | |
struct class_GpuDevice { | |
int f0; | |
global struct class_StreamInterface* f1; | |
}; | |
struct class_StreamInterface { | |
char f0; | |
}; | |
struct class_HalfBase { | |
struct class_HalfImpl f0; | |
}; | |
struct class_TensorEvaluator0 { | |
global struct class_Half* f0; | |
struct class_GpuDevice f1; | |
}; | |
struct class_Half { | |
struct class_HalfBase f0; | |
}; | |
struct class_TensorEvaluator2 { | |
global struct class_Half* f0; | |
struct class_GpuDevice f1; | |
}; | |
struct class_TensorEvaluator7 { | |
global struct class_Half* f0; | |
struct class_TensorEvaluator2 f1; | |
}; | |
struct class_TensorEvaluator6 { | |
struct class_TensorEvaluator0 f0; | |
struct class_TensorEvaluator7 f1; | |
}; | |
kernel void _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(global struct class_TensorEvaluator6* structs, uint structs_offset, global float* data, uint data_offset, global struct class_GpuDevice* gpudevices, uint gpudevices_offset, int a, int b, int c, local int *scratch); | |
kernel void _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(global struct class_TensorEvaluator6* structs, uint structs_offset, global float* data, uint data_offset, global struct class_GpuDevice* gpudevices, uint gpudevices_offset, int a, int b, int c, local int *scratch) { | |
gpudevices += gpudevices_offset; | |
data += data_offset; | |
structs += structs_offset; | |
float v10; | |
global struct class_Half* v4; | |
long v2; | |
short v6; | |
v1:; | |
/* long v2 = sext a */; | |
v2 = a; | |
/* struct class_Half** v3 = getelementptr structs v2 <unk> <unk> */; | |
/* struct class_Half* v4 = load v3 */; | |
v4 = (&(structs[v2].f0.f0))[0]; | |
/* short* v5 = getelementptr v4 v2 <unk> <unk> <unk> */; | |
/* short v6 = load v5 */; | |
v6 = (&(v4[v2].f0.f0.f0))[0]; | |
/* int v7 = sext v6 */; | |
/* int v8 = add v7 <unk> */; | |
/* float v10 = sitofp v8 */; | |
v10 = (float)(v6 + 123); | |
/* void v11 = store v10 data */; | |
data[0] = v10; | |
return; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = '/tmp/testprog-device-noopt.ll' | |
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" | |
target triple = "nvptx64-nvidia-cuda" | |
%struct.MyStruct = type { float, i32 } | |
%class.Half = type { %class.HalfBase } | |
%class.HalfBase = type { %class.HalfImpl } | |
%class.HalfImpl = type { i16 } | |
%class.TensorEvaluator6 = type { %class.TensorEvaluator0, %class.TensorEvaluator7 } | |
%class.TensorEvaluator0 = type { %class.Half*, %class.GpuDevice } | |
%class.GpuDevice = type { i32, %class.StreamInterface* } | |
%class.StreamInterface = type { i8 } | |
%class.TensorEvaluator7 = type { %class.Half*, %class.TensorEvaluator2 } | |
%class.TensorEvaluator2 = type { %class.Half*, %class.GpuDevice } | |
@.str = private unnamed_addr constant [5 x i8] c"NONE\00", align 1 | |
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @_ZL21__nvvm_reflect_anchorv to i8*)], section "llvm.metadata" | |
; Function Attrs: nounwind readnone | |
define internal i32 @_ZL21__nvvm_reflect_anchorv() #0 { | |
%1 = tail call i32 @__nvvm_reflect(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0)) #3 | |
ret i32 %1 | |
} | |
; Function Attrs: nounwind readnone | |
declare i32 @__nvvm_reflect(i8*) #0 | |
; Function Attrs: norecurse nounwind readonly | |
define float @_Z9sumStructPP8MyStructi(%struct.MyStruct** nocapture readonly %p_structs, i32 %N) #1 { | |
%1 = icmp sgt i32 %N, 0 | |
br i1 %1, label %.lr.ph.preheader, label %._crit_edge | |
.lr.ph.preheader: ; preds = %0 | |
%xtraiter = and i32 %N, 1 | |
%lcmp.mod = icmp eq i32 %xtraiter, 0 | |
br i1 %lcmp.mod, label %.lr.ph.preheader.split, label %.lr.ph.prol | |
.lr.ph.prol: ; preds = %.lr.ph.preheader | |
%2 = load %struct.MyStruct*, %struct.MyStruct** %p_structs, align 8, !tbaa !9 | |
%3 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %2, i64 0, i32 0 | |
%4 = load float, float* %3, align 4, !tbaa !13 | |
%5 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %2, i64 0, i32 1 | |
%6 = load i32, i32* %5, align 4, !tbaa !17 | |
%7 = sitofp i32 %6 to float | |
%8 = fmul float %7, 3.500000e+00 | |
%9 = fadd float %4, %8 | |
%10 = fadd float %9, 0.000000e+00 | |
br label %.lr.ph.preheader.split | |
.lr.ph.preheader.split: ; preds = %.lr.ph.prol, %.lr.ph.preheader | |
%.lcssa.unr = phi float [ undef, %.lr.ph.preheader ], [ %10, %.lr.ph.prol ] | |
%sum.02.unr = phi float [ 0.000000e+00, %.lr.ph.preheader ], [ %10, %.lr.ph.prol ] | |
%i.01.unr = phi i32 [ 0, %.lr.ph.preheader ], [ 1, %.lr.ph.prol ] | |
%11 = icmp eq i32 %N, 1 | |
br i1 %11, label %._crit_edge.loopexit, label %.lr.ph.preheader.split.split | |
.lr.ph.preheader.split.split: ; preds = %.lr.ph.preheader.split | |
br label %.lr.ph | |
._crit_edge.loopexit.unr-lcssa: ; preds = %.lr.ph | |
%.lcssa3 = phi float [ %34, %.lr.ph ] | |
br label %._crit_edge.loopexit | |
._crit_edge.loopexit: ; preds = %._crit_edge.loopexit.unr-lcssa, %.lr.ph.preheader.split | |
%.lcssa = phi float [ %.lcssa.unr, %.lr.ph.preheader.split ], [ %.lcssa3, %._crit_edge.loopexit.unr-lcssa ] | |
br label %._crit_edge | |
._crit_edge: ; preds = %._crit_edge.loopexit, %0 | |
%sum.0.lcssa = phi float [ 0.000000e+00, %0 ], [ %.lcssa, %._crit_edge.loopexit ] | |
ret float %sum.0.lcssa | |
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader.split.split | |
%sum.02 = phi float [ %sum.02.unr, %.lr.ph.preheader.split.split ], [ %34, %.lr.ph ] | |
%i.01 = phi i32 [ %i.01.unr, %.lr.ph.preheader.split.split ], [ %35, %.lr.ph ] | |
%12 = sext i32 %i.01 to i64 | |
%13 = getelementptr inbounds %struct.MyStruct*, %struct.MyStruct** %p_structs, i64 %12 | |
%14 = load %struct.MyStruct*, %struct.MyStruct** %13, align 8, !tbaa !9 | |
%15 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %14, i64 0, i32 0 | |
%16 = load float, float* %15, align 4, !tbaa !13 | |
%17 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %14, i64 0, i32 1 | |
%18 = load i32, i32* %17, align 4, !tbaa !17 | |
%19 = sitofp i32 %18 to float | |
%20 = fmul float %19, 3.500000e+00 | |
%21 = fadd float %16, %20 | |
%22 = fadd float %sum.02, %21 | |
%23 = add nuw nsw i32 %i.01, 1 | |
%24 = sext i32 %23 to i64 | |
%25 = getelementptr inbounds %struct.MyStruct*, %struct.MyStruct** %p_structs, i64 %24 | |
%26 = load %struct.MyStruct*, %struct.MyStruct** %25, align 8, !tbaa !9 | |
%27 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %26, i64 0, i32 0 | |
%28 = load float, float* %27, align 4, !tbaa !13 | |
%29 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %26, i64 0, i32 1 | |
%30 = load i32, i32* %29, align 4, !tbaa !17 | |
%31 = sitofp i32 %30 to float | |
%32 = fmul float %31, 3.500000e+00 | |
%33 = fadd float %28, %32 | |
%34 = fadd float %22, %33 | |
%35 = add nsw i32 %i.01, 2 | |
%exitcond.1 = icmp eq i32 %35, %N | |
br i1 %exitcond.1, label %._crit_edge.loopexit.unr-lcssa, label %.lr.ph | |
} | |
; Function Attrs: norecurse nounwind | |
define void @_Z8mykernelPfP8MyStructi(float* nocapture %data, %struct.MyStruct* %structs, i32 %N) #2 { | |
%1 = icmp sgt i32 %N, 0 | |
%2 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %structs, i64 0, i32 0 | |
br i1 %1, label %.lr.ph.i.preheader, label %._Z9sumStructPP8MyStructi.exit_crit_edge | |
._Z9sumStructPP8MyStructi.exit_crit_edge: ; preds = %0 | |
%.pre17 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %structs, i64 0, i32 1 | |
br label %_Z9sumStructPP8MyStructi.exit | |
.lr.ph.i.preheader: ; preds = %0 | |
%3 = load float, float* %2, align 4 | |
%4 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %structs, i64 0, i32 1 | |
%5 = load i32, i32* %4, align 4 | |
%6 = sitofp i32 %5 to float | |
%7 = fmul float %6, 3.500000e+00 | |
%8 = fadd float %3, %7 | |
%9 = add i32 %N, -1 | |
%xtraiter = and i32 %N, 7 | |
%lcmp.mod = icmp eq i32 %xtraiter, 0 | |
br i1 %lcmp.mod, label %.lr.ph.i.preheader.split, label %.lr.ph.i.prol.preheader | |
.lr.ph.i.prol.preheader: ; preds = %.lr.ph.i.preheader | |
br label %.lr.ph.i.prol | |
.lr.ph.i.prol: ; preds = %.lr.ph.i.prol, %.lr.ph.i.prol.preheader | |
%sum.02.i.prol = phi float [ %10, %.lr.ph.i.prol ], [ 0.000000e+00, %.lr.ph.i.prol.preheader ] | |
%i.01.i.prol = phi i32 [ %11, %.lr.ph.i.prol ], [ 0, %.lr.ph.i.prol.preheader ] | |
%prol.iter = phi i32 [ %prol.iter.sub, %.lr.ph.i.prol ], [ %xtraiter, %.lr.ph.i.prol.preheader ] | |
%10 = fadd float %sum.02.i.prol, %8 | |
%11 = add nuw nsw i32 %i.01.i.prol, 1 | |
%prol.iter.sub = add i32 %prol.iter, -1 | |
%prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0 | |
br i1 %prol.iter.cmp, label %.lr.ph.i.preheader.split.loopexit, label %.lr.ph.i.prol, !llvm.loop !18 | |
.lr.ph.i.preheader.split.loopexit: ; preds = %.lr.ph.i.prol | |
%.lcssa28 = phi i32 [ %11, %.lr.ph.i.prol ] | |
%.lcssa27 = phi float [ %10, %.lr.ph.i.prol ] | |
br label %.lr.ph.i.preheader.split | |
.lr.ph.i.preheader.split: ; preds = %.lr.ph.i.preheader.split.loopexit, %.lr.ph.i.preheader | |
%.lcssa24.unr = phi float [ undef, %.lr.ph.i.preheader ], [ %.lcssa27, %.lr.ph.i.preheader.split.loopexit ] | |
%sum.02.i.unr = phi float [ 0.000000e+00, %.lr.ph.i.preheader ], [ %.lcssa27, %.lr.ph.i.preheader.split.loopexit ] | |
%i.01.i.unr = phi i32 [ 0, %.lr.ph.i.preheader ], [ %.lcssa28, %.lr.ph.i.preheader.split.loopexit ] | |
%12 = icmp ult i32 %9, 7 | |
br i1 %12, label %_Z9sumStructPP8MyStructi.exit.loopexit, label %.lr.ph.i.preheader.split.split | |
.lr.ph.i.preheader.split.split: ; preds = %.lr.ph.i.preheader.split | |
br label %.lr.ph.i | |
.lr.ph.i: ; preds = %.lr.ph.i, %.lr.ph.i.preheader.split.split | |
%sum.02.i = phi float [ %sum.02.i.unr, %.lr.ph.i.preheader.split.split ], [ %20, %.lr.ph.i ] | |
%i.01.i = phi i32 [ %i.01.i.unr, %.lr.ph.i.preheader.split.split ], [ %21, %.lr.ph.i ] | |
%13 = fadd float %sum.02.i, %8 | |
%14 = fadd float %13, %8 | |
%15 = fadd float %14, %8 | |
%16 = fadd float %15, %8 | |
%17 = fadd float %16, %8 | |
%18 = fadd float %17, %8 | |
%19 = fadd float %18, %8 | |
%20 = fadd float %19, %8 | |
%21 = add nsw i32 %i.01.i, 8 | |
%exitcond.i.7 = icmp eq i32 %21, %N | |
br i1 %exitcond.i.7, label %_Z9sumStructPP8MyStructi.exit.loopexit.unr-lcssa, label %.lr.ph.i | |
_Z9sumStructPP8MyStructi.exit.loopexit.unr-lcssa: ; preds = %.lr.ph.i | |
%.lcssa26 = phi float [ %20, %.lr.ph.i ] | |
br label %_Z9sumStructPP8MyStructi.exit.loopexit | |
_Z9sumStructPP8MyStructi.exit.loopexit: ; preds = %_Z9sumStructPP8MyStructi.exit.loopexit.unr-lcssa, %.lr.ph.i.preheader.split | |
%.lcssa24 = phi float [ %.lcssa24.unr, %.lr.ph.i.preheader.split ], [ %.lcssa26, %_Z9sumStructPP8MyStructi.exit.loopexit.unr-lcssa ] | |
br label %_Z9sumStructPP8MyStructi.exit | |
_Z9sumStructPP8MyStructi.exit: ; preds = %_Z9sumStructPP8MyStructi.exit.loopexit, %._Z9sumStructPP8MyStructi.exit_crit_edge | |
%.pre-phi18 = phi i32* [ %.pre17, %._Z9sumStructPP8MyStructi.exit_crit_edge ], [ %4, %_Z9sumStructPP8MyStructi.exit.loopexit ] | |
%sum.0.lcssa.i = phi float [ 0.000000e+00, %._Z9sumStructPP8MyStructi.exit_crit_edge ], [ %.lcssa24, %_Z9sumStructPP8MyStructi.exit.loopexit ] | |
store float %sum.0.lcssa.i, float* %data, align 4, !tbaa !20 | |
%22 = load float, float* %2, align 4 | |
%23 = load i32, i32* %.pre-phi18, align 4 | |
%24 = sitofp i32 %23 to float | |
%25 = fmul float %24, 3.500000e+00 | |
%26 = fadd float %22, %25 | |
br label %.lr.ph.i11 | |
.lr.ph.i11: ; preds = %.lr.ph.i11, %_Z9sumStructPP8MyStructi.exit | |
%sum.02.i8 = phi float [ 0.000000e+00, %_Z9sumStructPP8MyStructi.exit ], [ %29, %.lr.ph.i11 ] | |
%i.01.i9 = phi i32 [ 0, %_Z9sumStructPP8MyStructi.exit ], [ %30, %.lr.ph.i11 ] | |
%27 = fadd float %sum.02.i8, %26 | |
%28 = fadd float %27, %26 | |
%29 = fadd float %28, %26 | |
%30 = add nsw i32 %i.01.i9, 3 | |
%exitcond.i10.2 = icmp eq i32 %30, 123 | |
br i1 %exitcond.i10.2, label %_Z9sumStructPP8MyStructi.exit12, label %.lr.ph.i11 | |
_Z9sumStructPP8MyStructi.exit12: ; preds = %.lr.ph.i11 | |
%.lcssa25 = phi float [ %29, %.lr.ph.i11 ] | |
%31 = getelementptr inbounds float, float* %data, i64 3 | |
store float %.lcssa25, float* %31, align 4, !tbaa !20 | |
%32 = load float, float* %2, align 4 | |
%33 = load i32, i32* %.pre-phi18, align 4 | |
%34 = sitofp i32 %33 to float | |
%35 = fmul float %34, 3.500000e+00 | |
%36 = fadd float %32, %35 | |
br label %.lr.ph.i5 | |
.lr.ph.i5: ; preds = %.lr.ph.i5, %_Z9sumStructPP8MyStructi.exit12 | |
%sum.02.i2 = phi float [ 0.000000e+00, %_Z9sumStructPP8MyStructi.exit12 ], [ %51, %.lr.ph.i5 ] | |
%i.01.i3 = phi i32 [ 0, %_Z9sumStructPP8MyStructi.exit12 ], [ %52, %.lr.ph.i5 ] | |
%37 = fadd float %sum.02.i2, %36 | |
%38 = fadd float %37, %36 | |
%39 = fadd float %38, %36 | |
%40 = fadd float %39, %36 | |
%41 = fadd float %40, %36 | |
%42 = fadd float %41, %36 | |
%43 = fadd float %42, %36 | |
%44 = fadd float %43, %36 | |
%45 = fadd float %44, %36 | |
%46 = fadd float %45, %36 | |
%47 = fadd float %46, %36 | |
%48 = fadd float %47, %36 | |
%49 = fadd float %48, %36 | |
%50 = fadd float %49, %36 | |
%51 = fadd float %50, %36 | |
%52 = add nsw i32 %i.01.i3, 15 | |
%exitcond.i4.14 = icmp eq i32 %52, 12300 | |
br i1 %exitcond.i4.14, label %_Z9sumStructPP8MyStructi.exit6, label %.lr.ph.i5 | |
_Z9sumStructPP8MyStructi.exit6: ; preds = %.lr.ph.i5 | |
%.lcssa = phi float [ %51, %.lr.ph.i5 ] | |
%53 = getelementptr inbounds float, float* %data, i64 4 | |
store float %.lcssa, float* %53, align 4, !tbaa !20 | |
ret void | |
} | |
; Function Attrs: norecurse nounwind readonly | |
define float @_Z12getHalfValueP4Halfi(%class.Half* nocapture readonly %half_, i32 %a) #1 { | |
%1 = sext i32 %a to i64 | |
%2 = getelementptr inbounds %class.Half, %class.Half* %half_, i64 %1, i32 0, i32 0, i32 0 | |
%3 = load i16, i16* %2, align 2, !tbaa !21 | |
%4 = sext i16 %3 to i32 | |
%5 = add nsw i32 %4, 123 | |
%6 = sitofp i32 %5 to float | |
ret float %6 | |
} | |
; Function Attrs: norecurse nounwind | |
define void @_Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(%class.TensorEvaluator6* nocapture readonly %structs, float* nocapture %data, %class.GpuDevice* nocapture readnone %gpudevices, i32 %a, i32 %b, i32 %c) #2 { | |
%1 = sext i32 %a to i64 | |
%2 = getelementptr inbounds %class.TensorEvaluator6, %class.TensorEvaluator6* %structs, i64 %1, i32 0, i32 0 | |
%3 = load %class.Half*, %class.Half** %2, align 8, !tbaa !24 | |
%4 = getelementptr inbounds %class.Half, %class.Half* %3, i64 %1, i32 0, i32 0, i32 0 | |
%5 = load i16, i16* %4, align 2, !tbaa !21 | |
%6 = sext i16 %5 to i32 | |
%7 = add nsw i32 %6, 123 | |
%8 = sitofp i32 %7 to float | |
store float %8, float* %data, align 4, !tbaa !20 | |
ret void | |
} | |
attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "target-features"="+ptx42" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #1 = { norecurse nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "target-features"="+ptx42" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #2 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "target-features"="+ptx42" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #3 = { nounwind readnone } | |
!nvvm.annotations = !{!0, !1, !2, !3, !2, !4, !4, !4, !4, !5, !5, !4} | |
!llvm.module.flags = !{!6} | |
!llvm.ident = !{!7} | |
!nvvm.internalize.after.link = !{} | |
!nvvmir.version = !{!8} | |
!0 = !{void (float*, %struct.MyStruct*, i32)* @_Z8mykernelPfP8MyStructi, !"kernel", i32 1} | |
!1 = !{void (%class.TensorEvaluator6*, float*, %class.GpuDevice*, i32, i32, i32)* @_Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii, !"kernel", i32 1} | |
!2 = !{null, !"align", i32 8} | |
!3 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} | |
!4 = !{null, !"align", i32 16} | |
!5 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} | |
!6 = !{i32 1, !"PIC Level", i32 2} | |
!7 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"} | |
!8 = !{i32 1, i32 2} | |
!9 = !{!10, !10, i64 0} | |
!10 = !{!"any pointer", !11, i64 0} | |
!11 = !{!"omnipotent char", !12, i64 0} | |
!12 = !{!"Simple C/C++ TBAA"} | |
!13 = !{!14, !15, i64 0} | |
!14 = !{!"_ZTS8MyStruct", !15, i64 0, !16, i64 4} | |
!15 = !{!"float", !11, i64 0} | |
!16 = !{!"int", !11, i64 0} | |
!17 = !{!14, !16, i64 4} | |
!18 = distinct !{!18, !19} | |
!19 = !{!"llvm.loop.unroll.disable"} | |
!20 = !{!15, !15, i64 0} | |
!21 = !{!22, !23, i64 0} | |
!22 = !{!"_ZTS8HalfImpl", !23, i64 0} | |
!23 = !{!"short", !11, i64 0} | |
!24 = !{!25, !10, i64 0} | |
!25 = !{!"_ZTS16TensorEvaluator6", !26, i64 0, !28, i64 24} | |
!26 = !{!"_ZTS16TensorEvaluator0", !10, i64 0, !27, i64 8} | |
!27 = !{!"_ZTS9GpuDevice", !16, i64 0, !10, i64 8} | |
!28 = !{!"_ZTS16TensorEvaluator7", !10, i64 0, !29, i64 8} | |
!29 = !{!"_ZTS16TensorEvaluator2", !10, i64 0, !27, i64 8} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment