Last active
September 18, 2019 07:16
-
-
Save isuruf/c05160e75fb8fa7eef9119ad7902fbfd to your computer and use it in GitHub Desktop.
LLVM bug
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clang-7 in1.ll -O3 -fvectorize -emit-llvm -S -o out1.ll | |
clang-7 in2.ll -O3 -fvectorize -emit-llvm -S -o out2.ll |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- in1.ll 2019-09-18 02:05:54.012101785 -0500 | |
+++ in2.ll 2019-09-18 02:05:44.811944483 -0500 | |
@@ -52,9 +52,9 @@ | |
%mul.i = mul nsw i32 %conv.i, -128 | |
%31 = load i64, i64* %_local_id_x, align 8, !tbaa !18, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%conv2.i = trunc i64 %30 to i32 | |
- %add.i = add i32 %0, -1 | |
- %add4.i = add i32 %add.i, %mul.i | |
- %add5.i = sub i32 %add4.i, %conv2.i | |
+ %add.i = xor i32 %conv2.i, -1 | |
+ %add4.i = add i32 %add.i, %0 | |
+ %add5.i = add i32 %add4.i, %mul.i | |
%cmp.i = icmp sgt i32 %add5.i, -1 | |
br i1 %cmp.i, label %32, label %.r_exit.i | |
@@ -144,9 +144,9 @@ | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%44 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%conv2.i.i = trunc i64 %43 to i32 | |
- %add.i.i = add i32 %9, -1 | |
- %add4.i.i = add i32 %add.i.i, %mul.i.i | |
- %add5.i.i = sub i32 %add4.i.i, %conv2.i.i | |
+ %add.i.i = xor i32 %conv2.i.i, -1 | |
+ %add4.i.i = add i32 %add.i.i, %9 | |
+ %add5.i.i = add i32 %add4.i.i, %mul.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %45, label %.r_exit.i.i | |
@@ -242,9 +242,9 @@ | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%42 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%conv2.i.i = trunc i64 %41 to i32 | |
- %add.i.i = add i32 %9, -1 | |
- %add4.i.i = add i32 %add.i.i, %mul.i.i | |
- %add5.i.i = sub i32 %add4.i.i, %conv2.i.i | |
+ %add.i.i = xor i32 %conv2.i.i, -1 | |
+ %add4.i.i = add i32 %add.i.i, %9 | |
+ %add5.i.i = add i32 %add4.i.i, %mul.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %43, label %.r_exit.i.i |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'parallel_bc' | |
source_filename = "parallel_bc" | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux-gnu" | |
@_group_id_x = external local_unnamed_addr global i64, align 8 | |
@_group_id_y = external local_unnamed_addr global i64, align 8 | |
@_group_id_z = external local_unnamed_addr global i64, align 8 | |
@_local_id_x = external local_unnamed_addr global i64, align 8 | |
@_local_id_y = external local_unnamed_addr global i64, align 8 | |
@_local_id_z = external local_unnamed_addr global i64, align 8 | |
; Function Attrs: noduplicate | |
define linkonce void @pocl.barrier() #0 { | |
ret void | |
} | |
; Function Attrs: alwaysinline norecurse nounwind | |
define void @_pocl_kernel_grudge_assign_0(i32, double* noalias nocapture, i32, double* noalias nocapture readonly, i32, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 !pocl_generated !11 { | |
%_local_id_x = alloca i64 | |
%_local_id_y = alloca i64 | |
%_local_id_z = alloca i64 | |
%10 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 3 | |
%11 = getelementptr i8*, i8** %10, i64 0 | |
%12 = load i8*, i8** %11 | |
%13 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 4 | |
%14 = getelementptr i32*, i32** %13, i64 0 | |
%15 = load i32*, i32** %14 | |
%16 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 5 | |
%17 = getelementptr i32, i32* %16, i64 0 | |
%18 = load i32, i32* %17 | |
%19 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 3 | |
%20 = getelementptr i8*, i8** %19, i64 0 | |
%21 = load i8*, i8** %20 | |
%22 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 4 | |
%23 = getelementptr i32*, i32** %22, i64 0 | |
%24 = load i32*, i32** %23 | |
%25 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 5 | |
%26 = getelementptr i32, i32* %25, i64 0 | |
%27 = load i32, i32* %26 | |
store i64 0, i64* %_local_id_x, !noalias !12 | |
store i64 0, i64* %_local_id_y, !noalias !12 | |
store i64 0, i64* %_local_id_z, !noalias !12 | |
store i64 0, i64* %_local_id_x, !noalias !12 | |
br label %pregion_for_entry..i | |
pregion_for_entry..i: ; preds = %.r_exit.i, %9 | |
%28 = load i64, i64* %_local_id_z, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%29 = load i64, i64* %_local_id_y, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%30 = load i64, i64* %_local_id_x, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%conv.i = trunc i64 %6 to i32 | |
%mul.i = mul nsw i32 %conv.i, -128 | |
%31 = load i64, i64* %_local_id_x, align 8, !tbaa !18, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%conv2.i = trunc i64 %30 to i32 | |
%add.i = add i32 %0, -1 | |
%add4.i = add i32 %add.i, %mul.i | |
%add5.i = sub i32 %add4.i, %conv2.i | |
%cmp.i = icmp sgt i32 %add5.i, -1 | |
br i1 %cmp.i, label %32, label %.r_exit.i | |
; <label>:32: ; preds = %pregion_for_entry..i | |
%mul9.i = shl nsw i32 %conv.i, 7 | |
%add10.i = add i32 %mul9.i, %conv2.i | |
%add13.i = add i32 %add10.i, %4 | |
%idxprom.i = sext i32 %add13.i to i64 | |
%arrayidx.i = getelementptr inbounds double, double* %3, i64 %idxprom.i | |
%33 = bitcast double* %arrayidx.i to i64* | |
%34 = load i64, i64* %33, align 8, !tbaa !22, !alias.scope !24, !noalias !25, !llvm.mem.parallel_loop_access !16 | |
%add20.i = add i32 %add10.i, %2 | |
%idxprom21.i = sext i32 %add20.i to i64 | |
%arrayidx22.i = getelementptr inbounds double, double* %1, i64 %idxprom21.i | |
%35 = bitcast double* %arrayidx22.i to i64* | |
store i64 %34, i64* %35, align 8, !tbaa !22, !alias.scope !25, !noalias !24, !llvm.mem.parallel_loop_access !16 | |
br label %.r_exit.i | |
.r_exit.i: ; preds = %32, %pregion_for_entry..i | |
%36 = load i64, i64* %_local_id_x, !noalias !12 | |
%37 = add i64 %36, 1 | |
store i64 %37, i64* %_local_id_x, !noalias !12 | |
%38 = load i64, i64* %_local_id_x, !noalias !12 | |
%39 = icmp ult i64 %38, 128 | |
br i1 %39, label %pregion_for_entry..i, label %grudge_assign_0.exit, !llvm.loop !17 | |
grudge_assign_0.exit: ; preds = %.r_exit.i | |
ret void | |
; uselistorder directives | |
uselistorder { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, { 3, 4, 5, 0, 1, 2 } | |
} | |
define void @_pocl_kernel_grudge_assign_0_workgroup(i8**, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) { | |
%_local_id_x.i = alloca i64 | |
%_local_id_y.i = alloca i64 | |
%_local_id_z.i = alloca i64 | |
%6 = getelementptr i8*, i8** %0, i32 0 | |
%7 = load i8*, i8** %6 | |
%8 = bitcast i8* %7 to i32* | |
%9 = load i32, i32* %8 | |
%10 = getelementptr i8*, i8** %0, i32 1 | |
%11 = load i8*, i8** %10 | |
%12 = bitcast i8* %11 to double** | |
%13 = load double*, double** %12 | |
%14 = getelementptr i8*, i8** %0, i32 2 | |
%15 = load i8*, i8** %14 | |
%16 = bitcast i8* %15 to i32* | |
%17 = load i32, i32* %16 | |
%18 = getelementptr i8*, i8** %0, i32 3 | |
%19 = load i8*, i8** %18 | |
%20 = bitcast i8* %19 to double** | |
%21 = load double*, double** %20 | |
%22 = getelementptr i8*, i8** %0, i32 4 | |
%23 = load i8*, i8** %22 | |
%24 = bitcast i8* %23 to i32* | |
%25 = load i32, i32* %24 | |
%26 = bitcast i64* %_local_id_x.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %26) | |
%27 = bitcast i64* %_local_id_y.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %27) | |
%28 = bitcast i64* %_local_id_z.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %28) | |
%29 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3 | |
%30 = load i8*, i8** %29, !noalias !26 | |
%31 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4 | |
%32 = load i32*, i32** %31, !noalias !26 | |
%33 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5 | |
%34 = load i32, i32* %33, !noalias !26 | |
%35 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3 | |
%36 = load i8*, i8** %35, !noalias !26 | |
%37 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4 | |
%38 = load i32*, i32** %37, !noalias !26 | |
%39 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5 | |
%40 = load i32, i32* %39, !noalias !26 | |
store i64 0, i64* %_local_id_x.i, !noalias !30 | |
store i64 0, i64* %_local_id_y.i, !noalias !30 | |
store i64 0, i64* %_local_id_z.i, !noalias !30 | |
store i64 0, i64* %_local_id_x.i, !noalias !30 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i, %5 | |
%41 = load i64, i64* %_local_id_z.i, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%42 = load i64, i64* %_local_id_y.i, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%43 = load i64, i64* %_local_id_x.i, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%44 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%conv2.i.i = trunc i64 %43 to i32 | |
%add.i.i = add i32 %9, -1 | |
%add4.i.i = add i32 %add.i.i, %mul.i.i | |
%add5.i.i = sub i32 %add4.i.i, %conv2.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %45, label %.r_exit.i.i | |
; <label>:45: ; preds = %pregion_for_entry..i.i | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
%add10.i.i = add i32 %mul9.i.i, %conv2.i.i | |
%add13.i.i = add i32 %add10.i.i, %25 | |
%idxprom.i.i = sext i32 %add13.i.i to i64 | |
%arrayidx.i.i = getelementptr inbounds double, double* %21, i64 %idxprom.i.i | |
%46 = bitcast double* %arrayidx.i.i to i64* | |
%47 = load i64, i64* %46, align 8, !tbaa !22, !alias.scope !34, !noalias !35, !llvm.mem.parallel_loop_access !16 | |
%add20.i.i = add i32 %add10.i.i, %17 | |
%idxprom21.i.i = sext i32 %add20.i.i to i64 | |
%arrayidx22.i.i = getelementptr inbounds double, double* %13, i64 %idxprom21.i.i | |
%48 = bitcast double* %arrayidx22.i.i to i64* | |
store i64 %47, i64* %48, align 8, !tbaa !22, !alias.scope !35, !noalias !34, !llvm.mem.parallel_loop_access !16 | |
br label %.r_exit.i.i | |
.r_exit.i.i: ; preds = %45, %pregion_for_entry..i.i | |
%49 = load i64, i64* %_local_id_x.i, !noalias !30 | |
%50 = add i64 %49, 1 | |
store i64 %50, i64* %_local_id_x.i, !noalias !30 | |
%51 = load i64, i64* %_local_id_x.i, !noalias !30 | |
%52 = icmp ult i64 %51, 128 | |
br i1 %52, label %pregion_for_entry..i.i, label %_pocl_kernel_grudge_assign_0.exit, !llvm.loop !17 | |
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i | |
%53 = bitcast i64* %_local_id_x.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %53) | |
%54 = bitcast i64* %_local_id_y.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %54) | |
%55 = bitcast i64* %_local_id_z.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %55) | |
ret void | |
; uselistorder directives | |
uselistorder i64* %_local_id_z.i, { 0, 2, 3, 1 } | |
uselistorder i64* %_local_id_y.i, { 0, 2, 3, 1 } | |
uselistorder i64* %_local_id_x.i, { 0, 2, 3, 4, 5, 6, 7, 8, 1 } | |
} | |
define void @_pocl_kernel_grudge_assign_0_workgroup_fast(i8**, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) { | |
%_local_id_x.i = alloca i64 | |
%_local_id_y.i = alloca i64 | |
%_local_id_z.i = alloca i64 | |
%6 = getelementptr i8*, i8** %0, i32 0 | |
%7 = load i8*, i8** %6 | |
%8 = bitcast i8* %7 to i32* | |
%9 = load i32, i32* %8 | |
%10 = getelementptr i8*, i8** %0, i32 1 | |
%11 = load i8*, i8** %10 | |
%12 = bitcast i8* %11 to double* | |
%13 = getelementptr i8*, i8** %0, i32 2 | |
%14 = load i8*, i8** %13 | |
%15 = bitcast i8* %14 to i32* | |
%16 = load i32, i32* %15 | |
%17 = getelementptr i8*, i8** %0, i32 3 | |
%18 = load i8*, i8** %17 | |
%19 = bitcast i8* %18 to double* | |
%20 = getelementptr i8*, i8** %0, i32 4 | |
%21 = load i8*, i8** %20 | |
%22 = bitcast i8* %21 to i32* | |
%23 = load i32, i32* %22 | |
%24 = bitcast i64* %_local_id_x.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %24) | |
%25 = bitcast i64* %_local_id_y.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %25) | |
%26 = bitcast i64* %_local_id_z.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %26) | |
%27 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3 | |
%28 = load i8*, i8** %27, !noalias !36 | |
%29 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4 | |
%30 = load i32*, i32** %29, !noalias !36 | |
%31 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5 | |
%32 = load i32, i32* %31, !noalias !36 | |
%33 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3 | |
%34 = load i8*, i8** %33, !noalias !36 | |
%35 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4 | |
%36 = load i32*, i32** %35, !noalias !36 | |
%37 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5 | |
%38 = load i32, i32* %37, !noalias !36 | |
store i64 0, i64* %_local_id_x.i, !noalias !40 | |
store i64 0, i64* %_local_id_y.i, !noalias !40 | |
store i64 0, i64* %_local_id_z.i, !noalias !40 | |
store i64 0, i64* %_local_id_x.i, !noalias !40 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i, %5 | |
%39 = load i64, i64* %_local_id_z.i, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%40 = load i64, i64* %_local_id_y.i, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%41 = load i64, i64* %_local_id_x.i, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%42 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%conv2.i.i = trunc i64 %41 to i32 | |
%add.i.i = add i32 %9, -1 | |
%add4.i.i = add i32 %add.i.i, %mul.i.i | |
%add5.i.i = sub i32 %add4.i.i, %conv2.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %43, label %.r_exit.i.i | |
; <label>:43: ; preds = %pregion_for_entry..i.i | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
%add10.i.i = add i32 %mul9.i.i, %conv2.i.i | |
%add13.i.i = add i32 %add10.i.i, %23 | |
%idxprom.i.i = sext i32 %add13.i.i to i64 | |
%arrayidx.i.i = getelementptr inbounds double, double* %19, i64 %idxprom.i.i | |
%44 = bitcast double* %arrayidx.i.i to i64* | |
%45 = load i64, i64* %44, align 8, !tbaa !22, !alias.scope !44, !noalias !45, !llvm.mem.parallel_loop_access !16 | |
%add20.i.i = add i32 %add10.i.i, %16 | |
%idxprom21.i.i = sext i32 %add20.i.i to i64 | |
%arrayidx22.i.i = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i | |
%46 = bitcast double* %arrayidx22.i.i to i64* | |
store i64 %45, i64* %46, align 8, !tbaa !22, !alias.scope !45, !noalias !44, !llvm.mem.parallel_loop_access !16 | |
br label %.r_exit.i.i | |
.r_exit.i.i: ; preds = %43, %pregion_for_entry..i.i | |
%47 = load i64, i64* %_local_id_x.i, !noalias !40 | |
%48 = add i64 %47, 1 | |
store i64 %48, i64* %_local_id_x.i, !noalias !40 | |
%49 = load i64, i64* %_local_id_x.i, !noalias !40 | |
%50 = icmp ult i64 %49, 128 | |
br i1 %50, label %pregion_for_entry..i.i, label %_pocl_kernel_grudge_assign_0.exit, !llvm.loop !17 | |
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i | |
%51 = bitcast i64* %_local_id_x.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %51) | |
%52 = bitcast i64* %_local_id_y.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %52) | |
%53 = bitcast i64* %_local_id_z.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %53) | |
ret void | |
; uselistorder directives | |
uselistorder i64* %_local_id_z.i, { 0, 2, 3, 1 } | |
uselistorder i64* %_local_id_y.i, { 0, 2, 3, 1 } | |
uselistorder i64* %_local_id_x.i, { 0, 2, 3, 4, 5, 6, 7, 8, 1 } | |
uselistorder i64 0, { 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 9, 10 } | |
uselistorder i32 5, { 0, 1, 2, 3, 5, 4 } | |
uselistorder i64 8, { 0, 2, 4, 1, 3, 5, 6, 8, 10, 7, 9, 11 } | |
uselistorder i32 4, { 0, 1, 4, 2, 3, 5, 7, 6 } | |
uselistorder i32 3, { 0, 1, 4, 2, 3, 5, 7, 6 } | |
uselistorder i32 0, { 0, 1, 2, 3, 4, 5, 12, 6, 7, 8, 9, 10, 11, 13, 17, 18, 19, 14, 15, 16 } | |
uselistorder i32 1, { 6, 0, 1, 2, 7, 3, 4, 5, 8, 9, 10 } | |
} | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2 | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2 | |
attributes #0 = { noduplicate } | |
attributes #1 = { alwaysinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #2 = { argmemonly nounwind } | |
!llvm.module.flags = !{!0, !1, !2} | |
!opencl.ocl.version = !{!3} | |
!llvm.ident = !{!4} | |
!opencl.spir.version = !{!3} | |
!0 = !{i32 1, !"wchar_size", i32 4} | |
!1 = !{i32 7, !"PIC Level", i32 2} | |
!2 = !{i32 7, !"PIE Level", i32 2} | |
!3 = !{i32 1, i32 2} | |
!4 = !{!"clang version 6.0.1 (tags/RELEASE_601/final)"} | |
!5 = !{i32 0, i32 1, i32 0, i32 1, i32 0} | |
!6 = !{!"none", !"none", !"none", !"none", !"none"} | |
!7 = !{!"int", !"double*", !"int", !"double*", !"int"} | |
!8 = !{!"", !"restrict", !"", !"restrict const", !""} | |
!9 = !{!"grdg_n", !"expr_8", !"expr_8_offset", !"grdg_sub_discr_dx0_dr0", !"grdg_sub_discr_dx0_dr0_offset"} | |
!10 = !{i32 128, i32 1, i32 1} | |
!11 = !{i32 1} | |
!12 = !{!13, !15} | |
!13 = distinct !{!13, !14, !"grudge_assign_0: %expr_8"} | |
!14 = distinct !{!14, !"grudge_assign_0"} | |
!15 = distinct !{!15, !14, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!16 = !{!17} | |
!17 = distinct !{!17} | |
!18 = !{!19, !19, i64 0} | |
!19 = !{!"long", !20, i64 0} | |
!20 = !{!"omnipotent char", !21, i64 0} | |
!21 = !{!"Simple C/C++ TBAA"} | |
!22 = !{!23, !23, i64 0} | |
!23 = !{!"double", !20, i64 0} | |
!24 = !{!15} | |
!25 = !{!13} | |
!26 = !{!27, !29} | |
!27 = distinct !{!27, !28, !"_pocl_kernel_grudge_assign_0: argument 0"} | |
!28 = distinct !{!28, !"_pocl_kernel_grudge_assign_0"} | |
!29 = distinct !{!29, !28, !"_pocl_kernel_grudge_assign_0: argument 1"} | |
!30 = !{!31, !33, !27, !29} | |
!31 = distinct !{!31, !32, !"grudge_assign_0: %expr_8"} | |
!32 = distinct !{!32, !"grudge_assign_0"} | |
!33 = distinct !{!33, !32, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!34 = !{!33, !29} | |
!35 = !{!31, !27} | |
!36 = !{!37, !39} | |
!37 = distinct !{!37, !38, !"_pocl_kernel_grudge_assign_0: argument 0"} | |
!38 = distinct !{!38, !"_pocl_kernel_grudge_assign_0"} | |
!39 = distinct !{!39, !38, !"_pocl_kernel_grudge_assign_0: argument 1"} | |
!40 = !{!41, !43, !37, !39} | |
!41 = distinct !{!41, !42, !"grudge_assign_0: %expr_8"} | |
!42 = distinct !{!42, !"grudge_assign_0"} | |
!43 = distinct !{!43, !42, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!44 = !{!43, !39} | |
!45 = !{!41, !37} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'parallel_bc' | |
source_filename = "parallel_bc" | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux-gnu" | |
@_group_id_x = external local_unnamed_addr global i64, align 8 | |
@_group_id_y = external local_unnamed_addr global i64, align 8 | |
@_group_id_z = external local_unnamed_addr global i64, align 8 | |
@_local_id_x = external local_unnamed_addr global i64, align 8 | |
@_local_id_y = external local_unnamed_addr global i64, align 8 | |
@_local_id_z = external local_unnamed_addr global i64, align 8 | |
; Function Attrs: noduplicate | |
define linkonce void @pocl.barrier() #0 { | |
ret void | |
} | |
; Function Attrs: alwaysinline norecurse nounwind | |
define void @_pocl_kernel_grudge_assign_0(i32, double* noalias nocapture, i32, double* noalias nocapture readonly, i32, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 !pocl_generated !11 { | |
%_local_id_x = alloca i64 | |
%_local_id_y = alloca i64 | |
%_local_id_z = alloca i64 | |
%10 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 3 | |
%11 = getelementptr i8*, i8** %10, i64 0 | |
%12 = load i8*, i8** %11 | |
%13 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 4 | |
%14 = getelementptr i32*, i32** %13, i64 0 | |
%15 = load i32*, i32** %14 | |
%16 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 5 | |
%17 = getelementptr i32, i32* %16, i64 0 | |
%18 = load i32, i32* %17 | |
%19 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 3 | |
%20 = getelementptr i8*, i8** %19, i64 0 | |
%21 = load i8*, i8** %20 | |
%22 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 4 | |
%23 = getelementptr i32*, i32** %22, i64 0 | |
%24 = load i32*, i32** %23 | |
%25 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 5 | |
%26 = getelementptr i32, i32* %25, i64 0 | |
%27 = load i32, i32* %26 | |
store i64 0, i64* %_local_id_x, !noalias !12 | |
store i64 0, i64* %_local_id_y, !noalias !12 | |
store i64 0, i64* %_local_id_z, !noalias !12 | |
store i64 0, i64* %_local_id_x, !noalias !12 | |
br label %pregion_for_entry..i | |
pregion_for_entry..i: ; preds = %.r_exit.i, %9 | |
%28 = load i64, i64* %_local_id_z, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%29 = load i64, i64* %_local_id_y, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%30 = load i64, i64* %_local_id_x, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%conv.i = trunc i64 %6 to i32 | |
%mul.i = mul nsw i32 %conv.i, -128 | |
%31 = load i64, i64* %_local_id_x, align 8, !tbaa !18, !noalias !12, !llvm.mem.parallel_loop_access !16 | |
%conv2.i = trunc i64 %30 to i32 | |
%add.i = xor i32 %conv2.i, -1 | |
%add4.i = add i32 %add.i, %0 | |
%add5.i = add i32 %add4.i, %mul.i | |
%cmp.i = icmp sgt i32 %add5.i, -1 | |
br i1 %cmp.i, label %32, label %.r_exit.i | |
; <label>:32: ; preds = %pregion_for_entry..i | |
%mul9.i = shl nsw i32 %conv.i, 7 | |
%add10.i = add i32 %mul9.i, %conv2.i | |
%add13.i = add i32 %add10.i, %4 | |
%idxprom.i = sext i32 %add13.i to i64 | |
%arrayidx.i = getelementptr inbounds double, double* %3, i64 %idxprom.i | |
%33 = bitcast double* %arrayidx.i to i64* | |
%34 = load i64, i64* %33, align 8, !tbaa !22, !alias.scope !24, !noalias !25, !llvm.mem.parallel_loop_access !16 | |
%add20.i = add i32 %add10.i, %2 | |
%idxprom21.i = sext i32 %add20.i to i64 | |
%arrayidx22.i = getelementptr inbounds double, double* %1, i64 %idxprom21.i | |
%35 = bitcast double* %arrayidx22.i to i64* | |
store i64 %34, i64* %35, align 8, !tbaa !22, !alias.scope !25, !noalias !24, !llvm.mem.parallel_loop_access !16 | |
br label %.r_exit.i | |
.r_exit.i: ; preds = %32, %pregion_for_entry..i | |
%36 = load i64, i64* %_local_id_x, !noalias !12 | |
%37 = add i64 %36, 1 | |
store i64 %37, i64* %_local_id_x, !noalias !12 | |
%38 = load i64, i64* %_local_id_x, !noalias !12 | |
%39 = icmp ult i64 %38, 128 | |
br i1 %39, label %pregion_for_entry..i, label %grudge_assign_0.exit, !llvm.loop !17 | |
grudge_assign_0.exit: ; preds = %.r_exit.i | |
ret void | |
; uselistorder directives | |
uselistorder { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, { 3, 4, 5, 0, 1, 2 } | |
} | |
define void @_pocl_kernel_grudge_assign_0_workgroup(i8**, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) { | |
%_local_id_x.i = alloca i64 | |
%_local_id_y.i = alloca i64 | |
%_local_id_z.i = alloca i64 | |
%6 = getelementptr i8*, i8** %0, i32 0 | |
%7 = load i8*, i8** %6 | |
%8 = bitcast i8* %7 to i32* | |
%9 = load i32, i32* %8 | |
%10 = getelementptr i8*, i8** %0, i32 1 | |
%11 = load i8*, i8** %10 | |
%12 = bitcast i8* %11 to double** | |
%13 = load double*, double** %12 | |
%14 = getelementptr i8*, i8** %0, i32 2 | |
%15 = load i8*, i8** %14 | |
%16 = bitcast i8* %15 to i32* | |
%17 = load i32, i32* %16 | |
%18 = getelementptr i8*, i8** %0, i32 3 | |
%19 = load i8*, i8** %18 | |
%20 = bitcast i8* %19 to double** | |
%21 = load double*, double** %20 | |
%22 = getelementptr i8*, i8** %0, i32 4 | |
%23 = load i8*, i8** %22 | |
%24 = bitcast i8* %23 to i32* | |
%25 = load i32, i32* %24 | |
%26 = bitcast i64* %_local_id_x.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %26) | |
%27 = bitcast i64* %_local_id_y.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %27) | |
%28 = bitcast i64* %_local_id_z.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %28) | |
%29 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3 | |
%30 = load i8*, i8** %29, !noalias !26 | |
%31 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4 | |
%32 = load i32*, i32** %31, !noalias !26 | |
%33 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5 | |
%34 = load i32, i32* %33, !noalias !26 | |
%35 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3 | |
%36 = load i8*, i8** %35, !noalias !26 | |
%37 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4 | |
%38 = load i32*, i32** %37, !noalias !26 | |
%39 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5 | |
%40 = load i32, i32* %39, !noalias !26 | |
store i64 0, i64* %_local_id_x.i, !noalias !30 | |
store i64 0, i64* %_local_id_y.i, !noalias !30 | |
store i64 0, i64* %_local_id_z.i, !noalias !30 | |
store i64 0, i64* %_local_id_x.i, !noalias !30 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i, %5 | |
%41 = load i64, i64* %_local_id_z.i, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%42 = load i64, i64* %_local_id_y.i, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%43 = load i64, i64* %_local_id_x.i, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%44 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !30, !llvm.mem.parallel_loop_access !16 | |
%conv2.i.i = trunc i64 %43 to i32 | |
%add.i.i = xor i32 %conv2.i.i, -1 | |
%add4.i.i = add i32 %add.i.i, %9 | |
%add5.i.i = add i32 %add4.i.i, %mul.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %45, label %.r_exit.i.i | |
; <label>:45: ; preds = %pregion_for_entry..i.i | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
%add10.i.i = add i32 %mul9.i.i, %conv2.i.i | |
%add13.i.i = add i32 %add10.i.i, %25 | |
%idxprom.i.i = sext i32 %add13.i.i to i64 | |
%arrayidx.i.i = getelementptr inbounds double, double* %21, i64 %idxprom.i.i | |
%46 = bitcast double* %arrayidx.i.i to i64* | |
%47 = load i64, i64* %46, align 8, !tbaa !22, !alias.scope !34, !noalias !35, !llvm.mem.parallel_loop_access !16 | |
%add20.i.i = add i32 %add10.i.i, %17 | |
%idxprom21.i.i = sext i32 %add20.i.i to i64 | |
%arrayidx22.i.i = getelementptr inbounds double, double* %13, i64 %idxprom21.i.i | |
%48 = bitcast double* %arrayidx22.i.i to i64* | |
store i64 %47, i64* %48, align 8, !tbaa !22, !alias.scope !35, !noalias !34, !llvm.mem.parallel_loop_access !16 | |
br label %.r_exit.i.i | |
.r_exit.i.i: ; preds = %45, %pregion_for_entry..i.i | |
%49 = load i64, i64* %_local_id_x.i, !noalias !30 | |
%50 = add i64 %49, 1 | |
store i64 %50, i64* %_local_id_x.i, !noalias !30 | |
%51 = load i64, i64* %_local_id_x.i, !noalias !30 | |
%52 = icmp ult i64 %51, 128 | |
br i1 %52, label %pregion_for_entry..i.i, label %_pocl_kernel_grudge_assign_0.exit, !llvm.loop !17 | |
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i | |
%53 = bitcast i64* %_local_id_x.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %53) | |
%54 = bitcast i64* %_local_id_y.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %54) | |
%55 = bitcast i64* %_local_id_z.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %55) | |
ret void | |
; uselistorder directives | |
uselistorder i64* %_local_id_z.i, { 0, 2, 3, 1 } | |
uselistorder i64* %_local_id_y.i, { 0, 2, 3, 1 } | |
uselistorder i64* %_local_id_x.i, { 0, 2, 3, 4, 5, 6, 7, 8, 1 } | |
} | |
define void @_pocl_kernel_grudge_assign_0_workgroup_fast(i8**, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) { | |
%_local_id_x.i = alloca i64 | |
%_local_id_y.i = alloca i64 | |
%_local_id_z.i = alloca i64 | |
%6 = getelementptr i8*, i8** %0, i32 0 | |
%7 = load i8*, i8** %6 | |
%8 = bitcast i8* %7 to i32* | |
%9 = load i32, i32* %8 | |
%10 = getelementptr i8*, i8** %0, i32 1 | |
%11 = load i8*, i8** %10 | |
%12 = bitcast i8* %11 to double* | |
%13 = getelementptr i8*, i8** %0, i32 2 | |
%14 = load i8*, i8** %13 | |
%15 = bitcast i8* %14 to i32* | |
%16 = load i32, i32* %15 | |
%17 = getelementptr i8*, i8** %0, i32 3 | |
%18 = load i8*, i8** %17 | |
%19 = bitcast i8* %18 to double* | |
%20 = getelementptr i8*, i8** %0, i32 4 | |
%21 = load i8*, i8** %20 | |
%22 = bitcast i8* %21 to i32* | |
%23 = load i32, i32* %22 | |
%24 = bitcast i64* %_local_id_x.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %24) | |
%25 = bitcast i64* %_local_id_y.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %25) | |
%26 = bitcast i64* %_local_id_z.i to i8* | |
call void @llvm.lifetime.start.p0i8(i64 8, i8* %26) | |
%27 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3 | |
%28 = load i8*, i8** %27, !noalias !36 | |
%29 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4 | |
%30 = load i32*, i32** %29, !noalias !36 | |
%31 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5 | |
%32 = load i32, i32* %31, !noalias !36 | |
%33 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3 | |
%34 = load i8*, i8** %33, !noalias !36 | |
%35 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4 | |
%36 = load i32*, i32** %35, !noalias !36 | |
%37 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5 | |
%38 = load i32, i32* %37, !noalias !36 | |
store i64 0, i64* %_local_id_x.i, !noalias !40 | |
store i64 0, i64* %_local_id_y.i, !noalias !40 | |
store i64 0, i64* %_local_id_z.i, !noalias !40 | |
store i64 0, i64* %_local_id_x.i, !noalias !40 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i, %5 | |
%39 = load i64, i64* %_local_id_z.i, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%40 = load i64, i64* %_local_id_y.i, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%41 = load i64, i64* %_local_id_x.i, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%42 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !40, !llvm.mem.parallel_loop_access !16 | |
%conv2.i.i = trunc i64 %41 to i32 | |
%add.i.i = xor i32 %conv2.i.i, -1 | |
%add4.i.i = add i32 %add.i.i, %9 | |
%add5.i.i = add i32 %add4.i.i, %mul.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %43, label %.r_exit.i.i | |
; <label>:43: ; preds = %pregion_for_entry..i.i | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
%add10.i.i = add i32 %mul9.i.i, %conv2.i.i | |
%add13.i.i = add i32 %add10.i.i, %23 | |
%idxprom.i.i = sext i32 %add13.i.i to i64 | |
%arrayidx.i.i = getelementptr inbounds double, double* %19, i64 %idxprom.i.i | |
%44 = bitcast double* %arrayidx.i.i to i64* | |
%45 = load i64, i64* %44, align 8, !tbaa !22, !alias.scope !44, !noalias !45, !llvm.mem.parallel_loop_access !16 | |
%add20.i.i = add i32 %add10.i.i, %16 | |
%idxprom21.i.i = sext i32 %add20.i.i to i64 | |
%arrayidx22.i.i = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i | |
%46 = bitcast double* %arrayidx22.i.i to i64* | |
store i64 %45, i64* %46, align 8, !tbaa !22, !alias.scope !45, !noalias !44, !llvm.mem.parallel_loop_access !16 | |
br label %.r_exit.i.i | |
.r_exit.i.i: ; preds = %43, %pregion_for_entry..i.i | |
%47 = load i64, i64* %_local_id_x.i, !noalias !40 | |
%48 = add i64 %47, 1 | |
store i64 %48, i64* %_local_id_x.i, !noalias !40 | |
%49 = load i64, i64* %_local_id_x.i, !noalias !40 | |
%50 = icmp ult i64 %49, 128 | |
br i1 %50, label %pregion_for_entry..i.i, label %_pocl_kernel_grudge_assign_0.exit, !llvm.loop !17 | |
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i | |
%51 = bitcast i64* %_local_id_x.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %51) | |
%52 = bitcast i64* %_local_id_y.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %52) | |
%53 = bitcast i64* %_local_id_z.i to i8* | |
call void @llvm.lifetime.end.p0i8(i64 8, i8* %53) | |
ret void | |
; uselistorder directives | |
uselistorder i64* %_local_id_z.i, { 0, 2, 3, 1 } | |
uselistorder i64* %_local_id_y.i, { 0, 2, 3, 1 } | |
uselistorder i64* %_local_id_x.i, { 0, 2, 3, 4, 5, 6, 7, 8, 1 } | |
uselistorder i64 0, { 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 9, 10 } | |
uselistorder i32 5, { 0, 1, 2, 3, 5, 4 } | |
uselistorder i64 8, { 0, 2, 4, 1, 3, 5, 6, 8, 10, 7, 9, 11 } | |
uselistorder i32 4, { 0, 1, 4, 2, 3, 5, 7, 6 } | |
uselistorder i32 3, { 0, 1, 4, 2, 3, 5, 7, 6 } | |
uselistorder i32 0, { 0, 1, 2, 3, 4, 5, 12, 6, 7, 8, 9, 10, 11, 13, 17, 18, 19, 14, 15, 16 } | |
uselistorder i32 1, { 6, 0, 1, 2, 7, 3, 4, 5, 8, 9, 10 } | |
} | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2 | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2 | |
attributes #0 = { noduplicate } | |
attributes #1 = { alwaysinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #2 = { argmemonly nounwind } | |
!llvm.module.flags = !{!0, !1, !2} | |
!opencl.ocl.version = !{!3} | |
!llvm.ident = !{!4} | |
!opencl.spir.version = !{!3} | |
!0 = !{i32 1, !"wchar_size", i32 4} | |
!1 = !{i32 7, !"PIC Level", i32 2} | |
!2 = !{i32 7, !"PIE Level", i32 2} | |
!3 = !{i32 1, i32 2} | |
!4 = !{!"clang version 6.0.1 (tags/RELEASE_601/final)"} | |
!5 = !{i32 0, i32 1, i32 0, i32 1, i32 0} | |
!6 = !{!"none", !"none", !"none", !"none", !"none"} | |
!7 = !{!"int", !"double*", !"int", !"double*", !"int"} | |
!8 = !{!"", !"restrict", !"", !"restrict const", !""} | |
!9 = !{!"grdg_n", !"expr_8", !"expr_8_offset", !"grdg_sub_discr_dx0_dr0", !"grdg_sub_discr_dx0_dr0_offset"} | |
!10 = !{i32 128, i32 1, i32 1} | |
!11 = !{i32 1} | |
!12 = !{!13, !15} | |
!13 = distinct !{!13, !14, !"grudge_assign_0: %expr_8"} | |
!14 = distinct !{!14, !"grudge_assign_0"} | |
!15 = distinct !{!15, !14, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!16 = !{!17} | |
!17 = distinct !{!17} | |
!18 = !{!19, !19, i64 0} | |
!19 = !{!"long", !20, i64 0} | |
!20 = !{!"omnipotent char", !21, i64 0} | |
!21 = !{!"Simple C/C++ TBAA"} | |
!22 = !{!23, !23, i64 0} | |
!23 = !{!"double", !20, i64 0} | |
!24 = !{!15} | |
!25 = !{!13} | |
!26 = !{!27, !29} | |
!27 = distinct !{!27, !28, !"_pocl_kernel_grudge_assign_0: argument 0"} | |
!28 = distinct !{!28, !"_pocl_kernel_grudge_assign_0"} | |
!29 = distinct !{!29, !28, !"_pocl_kernel_grudge_assign_0: argument 1"} | |
!30 = !{!31, !33, !27, !29} | |
!31 = distinct !{!31, !32, !"grudge_assign_0: %expr_8"} | |
!32 = distinct !{!32, !"grudge_assign_0"} | |
!33 = distinct !{!33, !32, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!34 = !{!33, !29} | |
!35 = !{!31, !27} | |
!36 = !{!37, !39} | |
!37 = distinct !{!37, !38, !"_pocl_kernel_grudge_assign_0: argument 0"} | |
!38 = distinct !{!38, !"_pocl_kernel_grudge_assign_0"} | |
!39 = distinct !{!39, !38, !"_pocl_kernel_grudge_assign_0: argument 1"} | |
!40 = !{!41, !43, !37, !39} | |
!41 = distinct !{!41, !42, !"grudge_assign_0: %expr_8"} | |
!42 = distinct !{!42, !"grudge_assign_0"} | |
!43 = distinct !{!43, !42, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!44 = !{!43, !39} | |
!45 = !{!41, !37} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- out1.ll 2019-09-18 02:06:23.236601478 -0500 | |
+++ out2.ll 2019-09-18 02:06:27.156668512 -0500 | |
@@ -1,4 +1,4 @@ | |
-; ModuleID = 'in1.ll' | |
+; ModuleID = 'in2.ll' | |
source_filename = "parallel_bc" | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux-gnu" | |
@@ -8,8 +8,7 @@ | |
vector.scevcheck: | |
%conv.i = trunc i64 %6 to i32 | |
%mul.i = mul nsw i32 %conv.i, -128 | |
- %add.i = add i32 %0, -1 | |
- %add4.i = add i32 %add.i, %mul.i | |
+ %add4.i = add i32 %mul.i, %0 | |
%mul9.i = shl nsw i32 %conv.i, 7 | |
%9 = shl i32 %conv.i, 7 | |
%10 = add i32 %9, %4 | |
@@ -28,10 +27,10 @@ | |
%broadcast.splat19 = shufflevector <4 x i32> %broadcast.splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer | |
%broadcast.splatinsert20 = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat21 = shufflevector <4 x i32> %broadcast.splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer | |
- %15 = add <4 x i32> %broadcast.splat, <i32 0, i32 -1, i32 -2, i32 -3> | |
- %16 = add <4 x i32> %broadcast.splat17, <i32 -4, i32 -5, i32 -6, i32 -7> | |
- %17 = add <4 x i32> %broadcast.splat19, <i32 -8, i32 -9, i32 -10, i32 -11> | |
- %18 = add <4 x i32> %broadcast.splat21, <i32 -12, i32 -13, i32 -14, i32 -15> | |
+ %15 = add <4 x i32> %broadcast.splat, <i32 -1, i32 -2, i32 -3, i32 -4> | |
+ %16 = add <4 x i32> %broadcast.splat17, <i32 -5, i32 -6, i32 -7, i32 -8> | |
+ %17 = add <4 x i32> %broadcast.splat19, <i32 -9, i32 -10, i32 -11, i32 -12> | |
+ %18 = add <4 x i32> %broadcast.splat21, <i32 -13, i32 -14, i32 -15, i32 -16> | |
%19 = icmp sgt <4 x i32> %15, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%20 = icmp sgt <4 x i32> %16, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%21 = icmp sgt <4 x i32> %17, <i32 -1, i32 -1, i32 -1, i32 -1> | |
@@ -64,10 +63,10 @@ | |
%41 = getelementptr inbounds double, double* %35, i64 12 | |
%42 = bitcast double* %41 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24, <4 x i64>* %42, i32 8, <4 x i1> %22), !tbaa !12, !alias.scope !19, !noalias !16 | |
- %43 = add <4 x i32> %broadcast.splat, <i32 -16, i32 -17, i32 -18, i32 -19> | |
- %44 = add <4 x i32> %broadcast.splat17, <i32 -20, i32 -21, i32 -22, i32 -23> | |
- %45 = add <4 x i32> %broadcast.splat19, <i32 -24, i32 -25, i32 -26, i32 -27> | |
- %46 = add <4 x i32> %broadcast.splat21, <i32 -28, i32 -29, i32 -30, i32 -31> | |
+ %43 = add <4 x i32> %broadcast.splat, <i32 -17, i32 -18, i32 -19, i32 -20> | |
+ %44 = add <4 x i32> %broadcast.splat17, <i32 -21, i32 -22, i32 -23, i32 -24> | |
+ %45 = add <4 x i32> %broadcast.splat19, <i32 -25, i32 -26, i32 -27, i32 -28> | |
+ %46 = add <4 x i32> %broadcast.splat21, <i32 -29, i32 -30, i32 -31, i32 -32> | |
%47 = icmp sgt <4 x i32> %43, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%48 = icmp sgt <4 x i32> %44, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%49 = icmp sgt <4 x i32> %45, <i32 -1, i32 -1, i32 -1, i32 -1> | |
@@ -101,10 +100,10 @@ | |
%70 = getelementptr inbounds double, double* %64, i64 12 | |
%71 = bitcast double* %70 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.1, <4 x i64>* %71, i32 8, <4 x i1> %50), !tbaa !12, !alias.scope !19, !noalias !16 | |
- %72 = add <4 x i32> %broadcast.splat, <i32 -32, i32 -33, i32 -34, i32 -35> | |
- %73 = add <4 x i32> %broadcast.splat17, <i32 -36, i32 -37, i32 -38, i32 -39> | |
- %74 = add <4 x i32> %broadcast.splat19, <i32 -40, i32 -41, i32 -42, i32 -43> | |
- %75 = add <4 x i32> %broadcast.splat21, <i32 -44, i32 -45, i32 -46, i32 -47> | |
+ %72 = add <4 x i32> %broadcast.splat, <i32 -33, i32 -34, i32 -35, i32 -36> | |
+ %73 = add <4 x i32> %broadcast.splat17, <i32 -37, i32 -38, i32 -39, i32 -40> | |
+ %74 = add <4 x i32> %broadcast.splat19, <i32 -41, i32 -42, i32 -43, i32 -44> | |
+ %75 = add <4 x i32> %broadcast.splat21, <i32 -45, i32 -46, i32 -47, i32 -48> | |
%76 = icmp sgt <4 x i32> %72, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%77 = icmp sgt <4 x i32> %73, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%78 = icmp sgt <4 x i32> %74, <i32 -1, i32 -1, i32 -1, i32 -1> | |
@@ -138,10 +137,10 @@ | |
%99 = getelementptr inbounds double, double* %93, i64 12 | |
%100 = bitcast double* %99 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.2, <4 x i64>* %100, i32 8, <4 x i1> %79), !tbaa !12, !alias.scope !19, !noalias !16 | |
- %101 = add <4 x i32> %broadcast.splat, <i32 -48, i32 -49, i32 -50, i32 -51> | |
- %102 = add <4 x i32> %broadcast.splat17, <i32 -52, i32 -53, i32 -54, i32 -55> | |
- %103 = add <4 x i32> %broadcast.splat19, <i32 -56, i32 -57, i32 -58, i32 -59> | |
- %104 = add <4 x i32> %broadcast.splat21, <i32 -60, i32 -61, i32 -62, i32 -63> | |
+ %101 = add <4 x i32> %broadcast.splat, <i32 -49, i32 -50, i32 -51, i32 -52> | |
+ %102 = add <4 x i32> %broadcast.splat17, <i32 -53, i32 -54, i32 -55, i32 -56> | |
+ %103 = add <4 x i32> %broadcast.splat19, <i32 -57, i32 -58, i32 -59, i32 -60> | |
+ %104 = add <4 x i32> %broadcast.splat21, <i32 -61, i32 -62, i32 -63, i32 -64> | |
%105 = icmp sgt <4 x i32> %101, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%106 = icmp sgt <4 x i32> %102, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%107 = icmp sgt <4 x i32> %103, <i32 -1, i32 -1, i32 -1, i32 -1> | |
@@ -175,10 +174,10 @@ | |
%128 = getelementptr inbounds double, double* %122, i64 12 | |
%129 = bitcast double* %128 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.3, <4 x i64>* %129, i32 8, <4 x i1> %108), !tbaa !12, !alias.scope !19, !noalias !16 | |
- %130 = add <4 x i32> %broadcast.splat, <i32 -64, i32 -65, i32 -66, i32 -67> | |
- %131 = add <4 x i32> %broadcast.splat17, <i32 -68, i32 -69, i32 -70, i32 -71> | |
- %132 = add <4 x i32> %broadcast.splat19, <i32 -72, i32 -73, i32 -74, i32 -75> | |
- %133 = add <4 x i32> %broadcast.splat21, <i32 -76, i32 -77, i32 -78, i32 -79> | |
+ %130 = add <4 x i32> %broadcast.splat, <i32 -65, i32 -66, i32 -67, i32 -68> | |
+ %131 = add <4 x i32> %broadcast.splat17, <i32 -69, i32 -70, i32 -71, i32 -72> | |
+ %132 = add <4 x i32> %broadcast.splat19, <i32 -73, i32 -74, i32 -75, i32 -76> | |
+ %133 = add <4 x i32> %broadcast.splat21, <i32 -77, i32 -78, i32 -79, i32 -80> | |
%134 = icmp sgt <4 x i32> %130, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%135 = icmp sgt <4 x i32> %131, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%136 = icmp sgt <4 x i32> %132, <i32 -1, i32 -1, i32 -1, i32 -1> | |
@@ -212,10 +211,10 @@ | |
%157 = getelementptr inbounds double, double* %151, i64 12 | |
%158 = bitcast double* %157 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.4, <4 x i64>* %158, i32 8, <4 x i1> %137), !tbaa !12, !alias.scope !19, !noalias !16 | |
- %159 = add <4 x i32> %broadcast.splat, <i32 -80, i32 -81, i32 -82, i32 -83> | |
- %160 = add <4 x i32> %broadcast.splat17, <i32 -84, i32 -85, i32 -86, i32 -87> | |
- %161 = add <4 x i32> %broadcast.splat19, <i32 -88, i32 -89, i32 -90, i32 -91> | |
- %162 = add <4 x i32> %broadcast.splat21, <i32 -92, i32 -93, i32 -94, i32 -95> | |
+ %159 = add <4 x i32> %broadcast.splat, <i32 -81, i32 -82, i32 -83, i32 -84> | |
+ %160 = add <4 x i32> %broadcast.splat17, <i32 -85, i32 -86, i32 -87, i32 -88> | |
+ %161 = add <4 x i32> %broadcast.splat19, <i32 -89, i32 -90, i32 -91, i32 -92> | |
+ %162 = add <4 x i32> %broadcast.splat21, <i32 -93, i32 -94, i32 -95, i32 -96> | |
%163 = icmp sgt <4 x i32> %159, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%164 = icmp sgt <4 x i32> %160, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%165 = icmp sgt <4 x i32> %161, <i32 -1, i32 -1, i32 -1, i32 -1> | |
@@ -249,10 +248,10 @@ | |
%186 = getelementptr inbounds double, double* %180, i64 12 | |
%187 = bitcast double* %186 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.5, <4 x i64>* %187, i32 8, <4 x i1> %166), !tbaa !12, !alias.scope !19, !noalias !16 | |
- %188 = add <4 x i32> %broadcast.splat, <i32 -96, i32 -97, i32 -98, i32 -99> | |
- %189 = add <4 x i32> %broadcast.splat17, <i32 -100, i32 -101, i32 -102, i32 -103> | |
- %190 = add <4 x i32> %broadcast.splat19, <i32 -104, i32 -105, i32 -106, i32 -107> | |
- %191 = add <4 x i32> %broadcast.splat21, <i32 -108, i32 -109, i32 -110, i32 -111> | |
+ %188 = add <4 x i32> %broadcast.splat, <i32 -97, i32 -98, i32 -99, i32 -100> | |
+ %189 = add <4 x i32> %broadcast.splat17, <i32 -101, i32 -102, i32 -103, i32 -104> | |
+ %190 = add <4 x i32> %broadcast.splat19, <i32 -105, i32 -106, i32 -107, i32 -108> | |
+ %191 = add <4 x i32> %broadcast.splat21, <i32 -109, i32 -110, i32 -111, i32 -112> | |
%192 = icmp sgt <4 x i32> %188, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%193 = icmp sgt <4 x i32> %189, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%194 = icmp sgt <4 x i32> %190, <i32 -1, i32 -1, i32 -1, i32 -1> | |
@@ -286,10 +285,10 @@ | |
%215 = getelementptr inbounds double, double* %209, i64 12 | |
%216 = bitcast double* %215 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.6, <4 x i64>* %216, i32 8, <4 x i1> %195), !tbaa !12, !alias.scope !19, !noalias !16 | |
- %217 = add <4 x i32> %broadcast.splat, <i32 -112, i32 -113, i32 -114, i32 -115> | |
- %218 = add <4 x i32> %broadcast.splat17, <i32 -116, i32 -117, i32 -118, i32 -119> | |
- %219 = add <4 x i32> %broadcast.splat19, <i32 -120, i32 -121, i32 -122, i32 -123> | |
- %220 = add <4 x i32> %broadcast.splat21, <i32 -124, i32 -125, i32 -126, i32 -127> | |
+ %217 = add <4 x i32> %broadcast.splat, <i32 -113, i32 -114, i32 -115, i32 -116> | |
+ %218 = add <4 x i32> %broadcast.splat17, <i32 -117, i32 -118, i32 -119, i32 -120> | |
+ %219 = add <4 x i32> %broadcast.splat19, <i32 -121, i32 -122, i32 -123, i32 -124> | |
+ %220 = add <4 x i32> %broadcast.splat21, <i32 -125, i32 -126, i32 -127, i32 -128> | |
%221 = icmp sgt <4 x i32> %217, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%222 = icmp sgt <4 x i32> %218, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%223 = icmp sgt <4 x i32> %219, <i32 -1, i32 -1, i32 -1, i32 -1> | |
@@ -325,10 +324,11 @@ | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.7, <4 x i64>* %245, i32 8, <4 x i1> %224), !tbaa !12, !alias.scope !19, !noalias !16 | |
br label %grudge_assign_0.exit | |
-pregion_for_entry..i: ; preds = %vector.scevcheck, %.r_exit.i.3 | |
- %_local_id_x.0 = phi i64 [ %265, %.r_exit.i.3 ], [ 0, %vector.scevcheck ] | |
+pregion_for_entry..i: ; preds = %vector.scevcheck, %.r_exit.i.1 | |
+ %_local_id_x.0 = phi i64 [ %255, %.r_exit.i.1 ], [ 0, %vector.scevcheck ] | |
%conv2.i = trunc i64 %_local_id_x.0 to i32 | |
- %add5.i = sub i32 %add4.i, %conv2.i | |
+ %add.i = xor i32 %conv2.i, -1 | |
+ %add5.i = add i32 %add4.i, %add.i | |
%cmp.i = icmp sgt i32 %add5.i, -1 | |
br i1 %cmp.i, label %246, label %.r_exit.i | |
@@ -348,15 +348,16 @@ | |
.r_exit.i: ; preds = %246, %pregion_for_entry..i | |
%250 = trunc i64 %_local_id_x.0 to i32 | |
- %conv2.i.1 = or i32 %250, 1 | |
- %add5.i.1 = sub i32 %add4.i, %conv2.i.1 | |
+ %add.i.1 = xor i32 %250, -2 | |
+ %add5.i.1 = add i32 %add4.i, %add.i.1 | |
%cmp.i.1 = icmp sgt i32 %add5.i.1, -1 | |
br i1 %cmp.i.1, label %251, label %.r_exit.i.1 | |
-grudge_assign_0.exit: ; preds = %.r_exit.i.3, %vector.ph | |
+grudge_assign_0.exit: ; preds = %.r_exit.i.1, %vector.ph | |
ret void | |
; <label>:251: ; preds = %.r_exit.i | |
+ %conv2.i.1 = or i32 %250, 1 | |
%add10.i.1 = add nuw nsw i32 %mul9.i, %conv2.i.1 | |
%add13.i.1 = add i32 %add10.i.1, %4 | |
%idxprom.i.1 = sext i32 %add13.i.1 to i64 | |
@@ -371,51 +372,9 @@ | |
br label %.r_exit.i.1 | |
.r_exit.i.1: ; preds = %251, %.r_exit.i | |
- %255 = trunc i64 %_local_id_x.0 to i32 | |
- %conv2.i.2 = or i32 %255, 2 | |
- %add5.i.2 = sub i32 %add4.i, %conv2.i.2 | |
- %cmp.i.2 = icmp sgt i32 %add5.i.2, -1 | |
- br i1 %cmp.i.2, label %256, label %.r_exit.i.2 | |
- | |
-; <label>:256: ; preds = %.r_exit.i.1 | |
- %add10.i.2 = add nuw nsw i32 %mul9.i, %conv2.i.2 | |
- %add13.i.2 = add i32 %add10.i.2, %4 | |
- %idxprom.i.2 = sext i32 %add13.i.2 to i64 | |
- %arrayidx.i.2 = getelementptr inbounds double, double* %3, i64 %idxprom.i.2 | |
- %257 = bitcast double* %arrayidx.i.2 to i64* | |
- %258 = load i64, i64* %257, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21 | |
- %add20.i.2 = add i32 %add10.i.2, %2 | |
- %idxprom21.i.2 = sext i32 %add20.i.2 to i64 | |
- %arrayidx22.i.2 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.2 | |
- %259 = bitcast double* %arrayidx22.i.2 to i64* | |
- store i64 %258, i64* %259, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21 | |
- br label %.r_exit.i.2 | |
- | |
-.r_exit.i.2: ; preds = %256, %.r_exit.i.1 | |
- %260 = trunc i64 %_local_id_x.0 to i32 | |
- %conv2.i.3 = or i32 %260, 3 | |
- %add5.i.3 = sub i32 %add4.i, %conv2.i.3 | |
- %cmp.i.3 = icmp sgt i32 %add5.i.3, -1 | |
- br i1 %cmp.i.3, label %261, label %.r_exit.i.3 | |
- | |
-; <label>:261: ; preds = %.r_exit.i.2 | |
- %add10.i.3 = add nuw nsw i32 %mul9.i, %conv2.i.3 | |
- %add13.i.3 = add i32 %add10.i.3, %4 | |
- %idxprom.i.3 = sext i32 %add13.i.3 to i64 | |
- %arrayidx.i.3 = getelementptr inbounds double, double* %3, i64 %idxprom.i.3 | |
- %262 = bitcast double* %arrayidx.i.3 to i64* | |
- %263 = load i64, i64* %262, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21 | |
- %add20.i.3 = add i32 %add10.i.3, %2 | |
- %idxprom21.i.3 = sext i32 %add20.i.3 to i64 | |
- %arrayidx22.i.3 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.3 | |
- %264 = bitcast double* %arrayidx22.i.3 to i64* | |
- store i64 %263, i64* %264, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21 | |
- br label %.r_exit.i.3 | |
- | |
-.r_exit.i.3: ; preds = %261, %.r_exit.i.2 | |
- %265 = add nuw nsw i64 %_local_id_x.0, 4 | |
- %exitcond.3 = icmp eq i64 %265, 128 | |
- br i1 %exitcond.3, label %grudge_assign_0.exit, label %pregion_for_entry..i, !llvm.loop !23 | |
+ %255 = add nuw nsw i64 %_local_id_x.0, 2 | |
+ %exitcond.1 = icmp eq i64 %255, 128 | |
+ br i1 %exitcond.1, label %grudge_assign_0.exit, label %pregion_for_entry..i, !llvm.loop !23 | |
} | |
; Function Attrs: norecurse nounwind | |
@@ -441,15 +400,15 @@ | |
%24 = load i32, i32* %23, align 4 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
- %add.i.i = add i32 %mul.i.i, -1 | |
- %add4.i.i = add i32 %add.i.i, %8 | |
+ %add4.i.i = add i32 %8, %mul.i.i | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5 | |
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %34, %.r_exit.i.i.1 ] | |
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32 | |
- %add5.i.i = sub i32 %add4.i.i, %conv2.i.i | |
+ %add.i.i = xor i32 %conv2.i.i, -1 | |
+ %add5.i.i = add i32 %add4.i.i, %add.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %25, label %.r_exit.i.i | |
@@ -469,8 +428,8 @@ | |
.r_exit.i.i: ; preds = %25, %pregion_for_entry..i.i | |
%29 = trunc i64 %_local_id_x.i.0 to i32 | |
- %conv2.i.i.1 = or i32 %29, 1 | |
- %add5.i.i.1 = sub i32 %add4.i.i, %conv2.i.i.1 | |
+ %add.i.i.1 = xor i32 %29, -2 | |
+ %add5.i.i.1 = add i32 %add4.i.i, %add.i.i.1 | |
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1 | |
br i1 %cmp.i.i.1, label %30, label %.r_exit.i.i.1 | |
@@ -478,6 +437,7 @@ | |
ret void | |
; <label>:30: ; preds = %.r_exit.i.i | |
+ %conv2.i.i.1 = or i32 %29, 1 | |
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1 | |
%add13.i.i.1 = add i32 %add10.i.i.1, %24 | |
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64 | |
@@ -518,15 +478,15 @@ | |
%22 = load i32, i32* %21, align 4 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
- %add.i.i = add i32 %mul.i.i, -1 | |
- %add4.i.i = add i32 %add.i.i, %8 | |
+ %add4.i.i = add i32 %8, %mul.i.i | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5 | |
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %32, %.r_exit.i.i.1 ] | |
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32 | |
- %add5.i.i = sub i32 %add4.i.i, %conv2.i.i | |
+ %add.i.i = xor i32 %conv2.i.i, -1 | |
+ %add5.i.i = add i32 %add4.i.i, %add.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %23, label %.r_exit.i.i | |
@@ -546,8 +506,8 @@ | |
.r_exit.i.i: ; preds = %23, %pregion_for_entry..i.i | |
%27 = trunc i64 %_local_id_x.i.0 to i32 | |
- %conv2.i.i.1 = or i32 %27, 1 | |
- %add5.i.i.1 = sub i32 %add4.i.i, %conv2.i.i.1 | |
+ %add.i.i.1 = xor i32 %27, -2 | |
+ %add5.i.i.1 = add i32 %add4.i.i, %add.i.i.1 | |
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1 | |
br i1 %cmp.i.i.1, label %28, label %.r_exit.i.i.1 | |
@@ -555,6 +515,7 @@ | |
ret void | |
; <label>:28: ; preds = %.r_exit.i.i | |
+ %conv2.i.i.1 = or i32 %27, 1 | |
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1 | |
%add13.i.i.1 = add i32 %add10.i.i.1, %22 | |
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'in1.ll' | |
source_filename = "parallel_bc" | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux-gnu" | |
; Function Attrs: alwaysinline norecurse nounwind | |
define void @_pocl_kernel_grudge_assign_0(i32, double* noalias nocapture, i32, double* noalias nocapture readonly, i32, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 !pocl_generated !11 { | |
vector.scevcheck: | |
%conv.i = trunc i64 %6 to i32 | |
%mul.i = mul nsw i32 %conv.i, -128 | |
%add.i = add i32 %0, -1 | |
%add4.i = add i32 %add.i, %mul.i | |
%mul9.i = shl nsw i32 %conv.i, 7 | |
%9 = shl i32 %conv.i, 7 | |
%10 = add i32 %9, %4 | |
%11 = icmp sgt i32 %10, 2147483520 | |
%12 = add i32 %9, %2 | |
%13 = icmp sgt i32 %12, 2147483520 | |
%14 = or i1 %11, %13 | |
br i1 %14, label %pregion_for_entry..i, label %vector.ph | |
vector.ph: ; preds = %vector.scevcheck | |
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer | |
%broadcast.splatinsert16 = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat17 = shufflevector <4 x i32> %broadcast.splatinsert16, <4 x i32> undef, <4 x i32> zeroinitializer | |
%broadcast.splatinsert18 = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat19 = shufflevector <4 x i32> %broadcast.splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer | |
%broadcast.splatinsert20 = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat21 = shufflevector <4 x i32> %broadcast.splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer | |
%15 = add <4 x i32> %broadcast.splat, <i32 0, i32 -1, i32 -2, i32 -3> | |
%16 = add <4 x i32> %broadcast.splat17, <i32 -4, i32 -5, i32 -6, i32 -7> | |
%17 = add <4 x i32> %broadcast.splat19, <i32 -8, i32 -9, i32 -10, i32 -11> | |
%18 = add <4 x i32> %broadcast.splat21, <i32 -12, i32 -13, i32 -14, i32 -15> | |
%19 = icmp sgt <4 x i32> %15, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%20 = icmp sgt <4 x i32> %16, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%21 = icmp sgt <4 x i32> %17, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%22 = icmp sgt <4 x i32> %18, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%23 = add i32 %mul9.i, %4 | |
%24 = sext i32 %23 to i64 | |
%25 = getelementptr inbounds double, double* %3, i64 %24 | |
%26 = bitcast double* %25 to <4 x i64>* | |
%wide.load = load <4 x i64>, <4 x i64>* %26, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%27 = getelementptr inbounds double, double* %25, i64 4 | |
%28 = bitcast double* %27 to <4 x i64>* | |
%wide.load22 = load <4 x i64>, <4 x i64>* %28, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%29 = getelementptr inbounds double, double* %25, i64 8 | |
%30 = bitcast double* %29 to <4 x i64>* | |
%wide.load23 = load <4 x i64>, <4 x i64>* %30, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%31 = getelementptr inbounds double, double* %25, i64 12 | |
%32 = bitcast double* %31 to <4 x i64>* | |
%wide.load24 = load <4 x i64>, <4 x i64>* %32, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%33 = add i32 %mul9.i, %2 | |
%34 = sext i32 %33 to i64 | |
%35 = getelementptr inbounds double, double* %1, i64 %34 | |
%36 = bitcast double* %35 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load, <4 x i64>* %36, i32 8, <4 x i1> %19), !tbaa !12, !alias.scope !19, !noalias !16 | |
%37 = getelementptr inbounds double, double* %35, i64 4 | |
%38 = bitcast double* %37 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22, <4 x i64>* %38, i32 8, <4 x i1> %20), !tbaa !12, !alias.scope !19, !noalias !16 | |
%39 = getelementptr inbounds double, double* %35, i64 8 | |
%40 = bitcast double* %39 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23, <4 x i64>* %40, i32 8, <4 x i1> %21), !tbaa !12, !alias.scope !19, !noalias !16 | |
%41 = getelementptr inbounds double, double* %35, i64 12 | |
%42 = bitcast double* %41 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24, <4 x i64>* %42, i32 8, <4 x i1> %22), !tbaa !12, !alias.scope !19, !noalias !16 | |
%43 = add <4 x i32> %broadcast.splat, <i32 -16, i32 -17, i32 -18, i32 -19> | |
%44 = add <4 x i32> %broadcast.splat17, <i32 -20, i32 -21, i32 -22, i32 -23> | |
%45 = add <4 x i32> %broadcast.splat19, <i32 -24, i32 -25, i32 -26, i32 -27> | |
%46 = add <4 x i32> %broadcast.splat21, <i32 -28, i32 -29, i32 -30, i32 -31> | |
%47 = icmp sgt <4 x i32> %43, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%48 = icmp sgt <4 x i32> %44, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%49 = icmp sgt <4 x i32> %45, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%50 = icmp sgt <4 x i32> %46, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%51 = or i32 %mul9.i, 16 | |
%52 = add i32 %51, %4 | |
%53 = sext i32 %52 to i64 | |
%54 = getelementptr inbounds double, double* %3, i64 %53 | |
%55 = bitcast double* %54 to <4 x i64>* | |
%wide.load.1 = load <4 x i64>, <4 x i64>* %55, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%56 = getelementptr inbounds double, double* %54, i64 4 | |
%57 = bitcast double* %56 to <4 x i64>* | |
%wide.load22.1 = load <4 x i64>, <4 x i64>* %57, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%58 = getelementptr inbounds double, double* %54, i64 8 | |
%59 = bitcast double* %58 to <4 x i64>* | |
%wide.load23.1 = load <4 x i64>, <4 x i64>* %59, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%60 = getelementptr inbounds double, double* %54, i64 12 | |
%61 = bitcast double* %60 to <4 x i64>* | |
%wide.load24.1 = load <4 x i64>, <4 x i64>* %61, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%62 = add i32 %51, %2 | |
%63 = sext i32 %62 to i64 | |
%64 = getelementptr inbounds double, double* %1, i64 %63 | |
%65 = bitcast double* %64 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.1, <4 x i64>* %65, i32 8, <4 x i1> %47), !tbaa !12, !alias.scope !19, !noalias !16 | |
%66 = getelementptr inbounds double, double* %64, i64 4 | |
%67 = bitcast double* %66 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.1, <4 x i64>* %67, i32 8, <4 x i1> %48), !tbaa !12, !alias.scope !19, !noalias !16 | |
%68 = getelementptr inbounds double, double* %64, i64 8 | |
%69 = bitcast double* %68 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.1, <4 x i64>* %69, i32 8, <4 x i1> %49), !tbaa !12, !alias.scope !19, !noalias !16 | |
%70 = getelementptr inbounds double, double* %64, i64 12 | |
%71 = bitcast double* %70 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.1, <4 x i64>* %71, i32 8, <4 x i1> %50), !tbaa !12, !alias.scope !19, !noalias !16 | |
%72 = add <4 x i32> %broadcast.splat, <i32 -32, i32 -33, i32 -34, i32 -35> | |
%73 = add <4 x i32> %broadcast.splat17, <i32 -36, i32 -37, i32 -38, i32 -39> | |
%74 = add <4 x i32> %broadcast.splat19, <i32 -40, i32 -41, i32 -42, i32 -43> | |
%75 = add <4 x i32> %broadcast.splat21, <i32 -44, i32 -45, i32 -46, i32 -47> | |
%76 = icmp sgt <4 x i32> %72, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%77 = icmp sgt <4 x i32> %73, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%78 = icmp sgt <4 x i32> %74, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%79 = icmp sgt <4 x i32> %75, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%80 = or i32 %mul9.i, 32 | |
%81 = add i32 %80, %4 | |
%82 = sext i32 %81 to i64 | |
%83 = getelementptr inbounds double, double* %3, i64 %82 | |
%84 = bitcast double* %83 to <4 x i64>* | |
%wide.load.2 = load <4 x i64>, <4 x i64>* %84, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%85 = getelementptr inbounds double, double* %83, i64 4 | |
%86 = bitcast double* %85 to <4 x i64>* | |
%wide.load22.2 = load <4 x i64>, <4 x i64>* %86, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%87 = getelementptr inbounds double, double* %83, i64 8 | |
%88 = bitcast double* %87 to <4 x i64>* | |
%wide.load23.2 = load <4 x i64>, <4 x i64>* %88, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%89 = getelementptr inbounds double, double* %83, i64 12 | |
%90 = bitcast double* %89 to <4 x i64>* | |
%wide.load24.2 = load <4 x i64>, <4 x i64>* %90, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%91 = add i32 %80, %2 | |
%92 = sext i32 %91 to i64 | |
%93 = getelementptr inbounds double, double* %1, i64 %92 | |
%94 = bitcast double* %93 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.2, <4 x i64>* %94, i32 8, <4 x i1> %76), !tbaa !12, !alias.scope !19, !noalias !16 | |
%95 = getelementptr inbounds double, double* %93, i64 4 | |
%96 = bitcast double* %95 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.2, <4 x i64>* %96, i32 8, <4 x i1> %77), !tbaa !12, !alias.scope !19, !noalias !16 | |
%97 = getelementptr inbounds double, double* %93, i64 8 | |
%98 = bitcast double* %97 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.2, <4 x i64>* %98, i32 8, <4 x i1> %78), !tbaa !12, !alias.scope !19, !noalias !16 | |
%99 = getelementptr inbounds double, double* %93, i64 12 | |
%100 = bitcast double* %99 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.2, <4 x i64>* %100, i32 8, <4 x i1> %79), !tbaa !12, !alias.scope !19, !noalias !16 | |
%101 = add <4 x i32> %broadcast.splat, <i32 -48, i32 -49, i32 -50, i32 -51> | |
%102 = add <4 x i32> %broadcast.splat17, <i32 -52, i32 -53, i32 -54, i32 -55> | |
%103 = add <4 x i32> %broadcast.splat19, <i32 -56, i32 -57, i32 -58, i32 -59> | |
%104 = add <4 x i32> %broadcast.splat21, <i32 -60, i32 -61, i32 -62, i32 -63> | |
%105 = icmp sgt <4 x i32> %101, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%106 = icmp sgt <4 x i32> %102, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%107 = icmp sgt <4 x i32> %103, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%108 = icmp sgt <4 x i32> %104, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%109 = or i32 %mul9.i, 48 | |
%110 = add i32 %109, %4 | |
%111 = sext i32 %110 to i64 | |
%112 = getelementptr inbounds double, double* %3, i64 %111 | |
%113 = bitcast double* %112 to <4 x i64>* | |
%wide.load.3 = load <4 x i64>, <4 x i64>* %113, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%114 = getelementptr inbounds double, double* %112, i64 4 | |
%115 = bitcast double* %114 to <4 x i64>* | |
%wide.load22.3 = load <4 x i64>, <4 x i64>* %115, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%116 = getelementptr inbounds double, double* %112, i64 8 | |
%117 = bitcast double* %116 to <4 x i64>* | |
%wide.load23.3 = load <4 x i64>, <4 x i64>* %117, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%118 = getelementptr inbounds double, double* %112, i64 12 | |
%119 = bitcast double* %118 to <4 x i64>* | |
%wide.load24.3 = load <4 x i64>, <4 x i64>* %119, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%120 = add i32 %109, %2 | |
%121 = sext i32 %120 to i64 | |
%122 = getelementptr inbounds double, double* %1, i64 %121 | |
%123 = bitcast double* %122 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.3, <4 x i64>* %123, i32 8, <4 x i1> %105), !tbaa !12, !alias.scope !19, !noalias !16 | |
%124 = getelementptr inbounds double, double* %122, i64 4 | |
%125 = bitcast double* %124 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.3, <4 x i64>* %125, i32 8, <4 x i1> %106), !tbaa !12, !alias.scope !19, !noalias !16 | |
%126 = getelementptr inbounds double, double* %122, i64 8 | |
%127 = bitcast double* %126 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.3, <4 x i64>* %127, i32 8, <4 x i1> %107), !tbaa !12, !alias.scope !19, !noalias !16 | |
%128 = getelementptr inbounds double, double* %122, i64 12 | |
%129 = bitcast double* %128 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.3, <4 x i64>* %129, i32 8, <4 x i1> %108), !tbaa !12, !alias.scope !19, !noalias !16 | |
%130 = add <4 x i32> %broadcast.splat, <i32 -64, i32 -65, i32 -66, i32 -67> | |
%131 = add <4 x i32> %broadcast.splat17, <i32 -68, i32 -69, i32 -70, i32 -71> | |
%132 = add <4 x i32> %broadcast.splat19, <i32 -72, i32 -73, i32 -74, i32 -75> | |
%133 = add <4 x i32> %broadcast.splat21, <i32 -76, i32 -77, i32 -78, i32 -79> | |
%134 = icmp sgt <4 x i32> %130, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%135 = icmp sgt <4 x i32> %131, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%136 = icmp sgt <4 x i32> %132, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%137 = icmp sgt <4 x i32> %133, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%138 = or i32 %mul9.i, 64 | |
%139 = add i32 %138, %4 | |
%140 = sext i32 %139 to i64 | |
%141 = getelementptr inbounds double, double* %3, i64 %140 | |
%142 = bitcast double* %141 to <4 x i64>* | |
%wide.load.4 = load <4 x i64>, <4 x i64>* %142, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%143 = getelementptr inbounds double, double* %141, i64 4 | |
%144 = bitcast double* %143 to <4 x i64>* | |
%wide.load22.4 = load <4 x i64>, <4 x i64>* %144, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%145 = getelementptr inbounds double, double* %141, i64 8 | |
%146 = bitcast double* %145 to <4 x i64>* | |
%wide.load23.4 = load <4 x i64>, <4 x i64>* %146, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%147 = getelementptr inbounds double, double* %141, i64 12 | |
%148 = bitcast double* %147 to <4 x i64>* | |
%wide.load24.4 = load <4 x i64>, <4 x i64>* %148, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%149 = add i32 %138, %2 | |
%150 = sext i32 %149 to i64 | |
%151 = getelementptr inbounds double, double* %1, i64 %150 | |
%152 = bitcast double* %151 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.4, <4 x i64>* %152, i32 8, <4 x i1> %134), !tbaa !12, !alias.scope !19, !noalias !16 | |
%153 = getelementptr inbounds double, double* %151, i64 4 | |
%154 = bitcast double* %153 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.4, <4 x i64>* %154, i32 8, <4 x i1> %135), !tbaa !12, !alias.scope !19, !noalias !16 | |
%155 = getelementptr inbounds double, double* %151, i64 8 | |
%156 = bitcast double* %155 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.4, <4 x i64>* %156, i32 8, <4 x i1> %136), !tbaa !12, !alias.scope !19, !noalias !16 | |
%157 = getelementptr inbounds double, double* %151, i64 12 | |
%158 = bitcast double* %157 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.4, <4 x i64>* %158, i32 8, <4 x i1> %137), !tbaa !12, !alias.scope !19, !noalias !16 | |
%159 = add <4 x i32> %broadcast.splat, <i32 -80, i32 -81, i32 -82, i32 -83> | |
%160 = add <4 x i32> %broadcast.splat17, <i32 -84, i32 -85, i32 -86, i32 -87> | |
%161 = add <4 x i32> %broadcast.splat19, <i32 -88, i32 -89, i32 -90, i32 -91> | |
%162 = add <4 x i32> %broadcast.splat21, <i32 -92, i32 -93, i32 -94, i32 -95> | |
%163 = icmp sgt <4 x i32> %159, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%164 = icmp sgt <4 x i32> %160, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%165 = icmp sgt <4 x i32> %161, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%166 = icmp sgt <4 x i32> %162, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%167 = or i32 %mul9.i, 80 | |
%168 = add i32 %167, %4 | |
%169 = sext i32 %168 to i64 | |
%170 = getelementptr inbounds double, double* %3, i64 %169 | |
%171 = bitcast double* %170 to <4 x i64>* | |
%wide.load.5 = load <4 x i64>, <4 x i64>* %171, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%172 = getelementptr inbounds double, double* %170, i64 4 | |
%173 = bitcast double* %172 to <4 x i64>* | |
%wide.load22.5 = load <4 x i64>, <4 x i64>* %173, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%174 = getelementptr inbounds double, double* %170, i64 8 | |
%175 = bitcast double* %174 to <4 x i64>* | |
%wide.load23.5 = load <4 x i64>, <4 x i64>* %175, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%176 = getelementptr inbounds double, double* %170, i64 12 | |
%177 = bitcast double* %176 to <4 x i64>* | |
%wide.load24.5 = load <4 x i64>, <4 x i64>* %177, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%178 = add i32 %167, %2 | |
%179 = sext i32 %178 to i64 | |
%180 = getelementptr inbounds double, double* %1, i64 %179 | |
%181 = bitcast double* %180 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.5, <4 x i64>* %181, i32 8, <4 x i1> %163), !tbaa !12, !alias.scope !19, !noalias !16 | |
%182 = getelementptr inbounds double, double* %180, i64 4 | |
%183 = bitcast double* %182 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.5, <4 x i64>* %183, i32 8, <4 x i1> %164), !tbaa !12, !alias.scope !19, !noalias !16 | |
%184 = getelementptr inbounds double, double* %180, i64 8 | |
%185 = bitcast double* %184 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.5, <4 x i64>* %185, i32 8, <4 x i1> %165), !tbaa !12, !alias.scope !19, !noalias !16 | |
%186 = getelementptr inbounds double, double* %180, i64 12 | |
%187 = bitcast double* %186 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.5, <4 x i64>* %187, i32 8, <4 x i1> %166), !tbaa !12, !alias.scope !19, !noalias !16 | |
%188 = add <4 x i32> %broadcast.splat, <i32 -96, i32 -97, i32 -98, i32 -99> | |
%189 = add <4 x i32> %broadcast.splat17, <i32 -100, i32 -101, i32 -102, i32 -103> | |
%190 = add <4 x i32> %broadcast.splat19, <i32 -104, i32 -105, i32 -106, i32 -107> | |
%191 = add <4 x i32> %broadcast.splat21, <i32 -108, i32 -109, i32 -110, i32 -111> | |
%192 = icmp sgt <4 x i32> %188, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%193 = icmp sgt <4 x i32> %189, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%194 = icmp sgt <4 x i32> %190, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%195 = icmp sgt <4 x i32> %191, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%196 = or i32 %mul9.i, 96 | |
%197 = add i32 %196, %4 | |
%198 = sext i32 %197 to i64 | |
%199 = getelementptr inbounds double, double* %3, i64 %198 | |
%200 = bitcast double* %199 to <4 x i64>* | |
%wide.load.6 = load <4 x i64>, <4 x i64>* %200, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%201 = getelementptr inbounds double, double* %199, i64 4 | |
%202 = bitcast double* %201 to <4 x i64>* | |
%wide.load22.6 = load <4 x i64>, <4 x i64>* %202, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%203 = getelementptr inbounds double, double* %199, i64 8 | |
%204 = bitcast double* %203 to <4 x i64>* | |
%wide.load23.6 = load <4 x i64>, <4 x i64>* %204, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%205 = getelementptr inbounds double, double* %199, i64 12 | |
%206 = bitcast double* %205 to <4 x i64>* | |
%wide.load24.6 = load <4 x i64>, <4 x i64>* %206, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%207 = add i32 %196, %2 | |
%208 = sext i32 %207 to i64 | |
%209 = getelementptr inbounds double, double* %1, i64 %208 | |
%210 = bitcast double* %209 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.6, <4 x i64>* %210, i32 8, <4 x i1> %192), !tbaa !12, !alias.scope !19, !noalias !16 | |
%211 = getelementptr inbounds double, double* %209, i64 4 | |
%212 = bitcast double* %211 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.6, <4 x i64>* %212, i32 8, <4 x i1> %193), !tbaa !12, !alias.scope !19, !noalias !16 | |
%213 = getelementptr inbounds double, double* %209, i64 8 | |
%214 = bitcast double* %213 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.6, <4 x i64>* %214, i32 8, <4 x i1> %194), !tbaa !12, !alias.scope !19, !noalias !16 | |
%215 = getelementptr inbounds double, double* %209, i64 12 | |
%216 = bitcast double* %215 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.6, <4 x i64>* %216, i32 8, <4 x i1> %195), !tbaa !12, !alias.scope !19, !noalias !16 | |
%217 = add <4 x i32> %broadcast.splat, <i32 -112, i32 -113, i32 -114, i32 -115> | |
%218 = add <4 x i32> %broadcast.splat17, <i32 -116, i32 -117, i32 -118, i32 -119> | |
%219 = add <4 x i32> %broadcast.splat19, <i32 -120, i32 -121, i32 -122, i32 -123> | |
%220 = add <4 x i32> %broadcast.splat21, <i32 -124, i32 -125, i32 -126, i32 -127> | |
%221 = icmp sgt <4 x i32> %217, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%222 = icmp sgt <4 x i32> %218, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%223 = icmp sgt <4 x i32> %219, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%224 = icmp sgt <4 x i32> %220, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%225 = or i32 %mul9.i, 112 | |
%226 = add i32 %225, %4 | |
%227 = sext i32 %226 to i64 | |
%228 = getelementptr inbounds double, double* %3, i64 %227 | |
%229 = bitcast double* %228 to <4 x i64>* | |
%wide.load.7 = load <4 x i64>, <4 x i64>* %229, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%230 = getelementptr inbounds double, double* %228, i64 4 | |
%231 = bitcast double* %230 to <4 x i64>* | |
%wide.load22.7 = load <4 x i64>, <4 x i64>* %231, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%232 = getelementptr inbounds double, double* %228, i64 8 | |
%233 = bitcast double* %232 to <4 x i64>* | |
%wide.load23.7 = load <4 x i64>, <4 x i64>* %233, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%234 = getelementptr inbounds double, double* %228, i64 12 | |
%235 = bitcast double* %234 to <4 x i64>* | |
%wide.load24.7 = load <4 x i64>, <4 x i64>* %235, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%236 = add i32 %225, %2 | |
%237 = sext i32 %236 to i64 | |
%238 = getelementptr inbounds double, double* %1, i64 %237 | |
%239 = bitcast double* %238 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.7, <4 x i64>* %239, i32 8, <4 x i1> %221), !tbaa !12, !alias.scope !19, !noalias !16 | |
%240 = getelementptr inbounds double, double* %238, i64 4 | |
%241 = bitcast double* %240 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.7, <4 x i64>* %241, i32 8, <4 x i1> %222), !tbaa !12, !alias.scope !19, !noalias !16 | |
%242 = getelementptr inbounds double, double* %238, i64 8 | |
%243 = bitcast double* %242 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.7, <4 x i64>* %243, i32 8, <4 x i1> %223), !tbaa !12, !alias.scope !19, !noalias !16 | |
%244 = getelementptr inbounds double, double* %238, i64 12 | |
%245 = bitcast double* %244 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.7, <4 x i64>* %245, i32 8, <4 x i1> %224), !tbaa !12, !alias.scope !19, !noalias !16 | |
br label %grudge_assign_0.exit | |
pregion_for_entry..i: ; preds = %vector.scevcheck, %.r_exit.i.3 | |
%_local_id_x.0 = phi i64 [ %265, %.r_exit.i.3 ], [ 0, %vector.scevcheck ] | |
%conv2.i = trunc i64 %_local_id_x.0 to i32 | |
%add5.i = sub i32 %add4.i, %conv2.i | |
%cmp.i = icmp sgt i32 %add5.i, -1 | |
br i1 %cmp.i, label %246, label %.r_exit.i | |
; <label>:246: ; preds = %pregion_for_entry..i | |
%add10.i = add nuw nsw i32 %mul9.i, %conv2.i | |
%add13.i = add i32 %add10.i, %4 | |
%idxprom.i = sext i32 %add13.i to i64 | |
%arrayidx.i = getelementptr inbounds double, double* %3, i64 %idxprom.i | |
%247 = bitcast double* %arrayidx.i to i64* | |
%248 = load i64, i64* %247, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21 | |
%add20.i = add i32 %add10.i, %2 | |
%idxprom21.i = sext i32 %add20.i to i64 | |
%arrayidx22.i = getelementptr inbounds double, double* %1, i64 %idxprom21.i | |
%249 = bitcast double* %arrayidx22.i to i64* | |
store i64 %248, i64* %249, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i | |
.r_exit.i: ; preds = %246, %pregion_for_entry..i | |
%250 = trunc i64 %_local_id_x.0 to i32 | |
%conv2.i.1 = or i32 %250, 1 | |
%add5.i.1 = sub i32 %add4.i, %conv2.i.1 | |
%cmp.i.1 = icmp sgt i32 %add5.i.1, -1 | |
br i1 %cmp.i.1, label %251, label %.r_exit.i.1 | |
grudge_assign_0.exit: ; preds = %.r_exit.i.3, %vector.ph | |
ret void | |
; <label>:251: ; preds = %.r_exit.i | |
%add10.i.1 = add nuw nsw i32 %mul9.i, %conv2.i.1 | |
%add13.i.1 = add i32 %add10.i.1, %4 | |
%idxprom.i.1 = sext i32 %add13.i.1 to i64 | |
%arrayidx.i.1 = getelementptr inbounds double, double* %3, i64 %idxprom.i.1 | |
%252 = bitcast double* %arrayidx.i.1 to i64* | |
%253 = load i64, i64* %252, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21 | |
%add20.i.1 = add i32 %add10.i.1, %2 | |
%idxprom21.i.1 = sext i32 %add20.i.1 to i64 | |
%arrayidx22.i.1 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.1 | |
%254 = bitcast double* %arrayidx22.i.1 to i64* | |
store i64 %253, i64* %254, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.1 | |
.r_exit.i.1: ; preds = %251, %.r_exit.i | |
%255 = trunc i64 %_local_id_x.0 to i32 | |
%conv2.i.2 = or i32 %255, 2 | |
%add5.i.2 = sub i32 %add4.i, %conv2.i.2 | |
%cmp.i.2 = icmp sgt i32 %add5.i.2, -1 | |
br i1 %cmp.i.2, label %256, label %.r_exit.i.2 | |
; <label>:256: ; preds = %.r_exit.i.1 | |
%add10.i.2 = add nuw nsw i32 %mul9.i, %conv2.i.2 | |
%add13.i.2 = add i32 %add10.i.2, %4 | |
%idxprom.i.2 = sext i32 %add13.i.2 to i64 | |
%arrayidx.i.2 = getelementptr inbounds double, double* %3, i64 %idxprom.i.2 | |
%257 = bitcast double* %arrayidx.i.2 to i64* | |
%258 = load i64, i64* %257, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21 | |
%add20.i.2 = add i32 %add10.i.2, %2 | |
%idxprom21.i.2 = sext i32 %add20.i.2 to i64 | |
%arrayidx22.i.2 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.2 | |
%259 = bitcast double* %arrayidx22.i.2 to i64* | |
store i64 %258, i64* %259, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.2 | |
.r_exit.i.2: ; preds = %256, %.r_exit.i.1 | |
%260 = trunc i64 %_local_id_x.0 to i32 | |
%conv2.i.3 = or i32 %260, 3 | |
%add5.i.3 = sub i32 %add4.i, %conv2.i.3 | |
%cmp.i.3 = icmp sgt i32 %add5.i.3, -1 | |
br i1 %cmp.i.3, label %261, label %.r_exit.i.3 | |
; <label>:261: ; preds = %.r_exit.i.2 | |
%add10.i.3 = add nuw nsw i32 %mul9.i, %conv2.i.3 | |
%add13.i.3 = add i32 %add10.i.3, %4 | |
%idxprom.i.3 = sext i32 %add13.i.3 to i64 | |
%arrayidx.i.3 = getelementptr inbounds double, double* %3, i64 %idxprom.i.3 | |
%262 = bitcast double* %arrayidx.i.3 to i64* | |
%263 = load i64, i64* %262, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21 | |
%add20.i.3 = add i32 %add10.i.3, %2 | |
%idxprom21.i.3 = sext i32 %add20.i.3 to i64 | |
%arrayidx22.i.3 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.3 | |
%264 = bitcast double* %arrayidx22.i.3 to i64* | |
store i64 %263, i64* %264, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.3 | |
.r_exit.i.3: ; preds = %261, %.r_exit.i.2 | |
%265 = add nuw nsw i64 %_local_id_x.0, 4 | |
%exitcond.3 = icmp eq i64 %265, 128 | |
br i1 %exitcond.3, label %grudge_assign_0.exit, label %pregion_for_entry..i, !llvm.loop !23 | |
} | |
; Function Attrs: norecurse nounwind | |
define void @_pocl_kernel_grudge_assign_0_workgroup(i8** nocapture readonly, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #1 { | |
%6 = bitcast i8** %0 to i32** | |
%7 = load i32*, i32** %6, align 8 | |
%8 = load i32, i32* %7, align 4 | |
%9 = getelementptr i8*, i8** %0, i64 1 | |
%10 = bitcast i8** %9 to double*** | |
%11 = load double**, double*** %10, align 8 | |
%12 = load double*, double** %11, align 8 | |
%13 = getelementptr i8*, i8** %0, i64 2 | |
%14 = bitcast i8** %13 to i32** | |
%15 = load i32*, i32** %14, align 8 | |
%16 = load i32, i32* %15, align 4 | |
%17 = getelementptr i8*, i8** %0, i64 3 | |
%18 = bitcast i8** %17 to double*** | |
%19 = load double**, double*** %18, align 8 | |
%20 = load double*, double** %19, align 8 | |
%21 = getelementptr i8*, i8** %0, i64 4 | |
%22 = bitcast i8** %21 to i32** | |
%23 = load i32*, i32** %22, align 8 | |
%24 = load i32, i32* %23, align 4 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%add.i.i = add i32 %mul.i.i, -1 | |
%add4.i.i = add i32 %add.i.i, %8 | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5 | |
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %34, %.r_exit.i.i.1 ] | |
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32 | |
%add5.i.i = sub i32 %add4.i.i, %conv2.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %25, label %.r_exit.i.i | |
; <label>:25: ; preds = %pregion_for_entry..i.i | |
%add10.i.i = add nuw nsw i32 %mul9.i.i, %conv2.i.i | |
%add13.i.i = add i32 %add10.i.i, %24 | |
%idxprom.i.i = sext i32 %add13.i.i to i64 | |
%arrayidx.i.i = getelementptr inbounds double, double* %20, i64 %idxprom.i.i | |
%26 = bitcast double* %arrayidx.i.i to i64* | |
%27 = load i64, i64* %26, align 8, !tbaa !12, !alias.scope !25, !noalias !30, !llvm.mem.parallel_loop_access !21 | |
%add20.i.i = add i32 %add10.i.i, %16 | |
%idxprom21.i.i = sext i32 %add20.i.i to i64 | |
%arrayidx22.i.i = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i | |
%28 = bitcast double* %arrayidx22.i.i to i64* | |
store i64 %27, i64* %28, align 8, !tbaa !12, !alias.scope !30, !noalias !25, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.i | |
.r_exit.i.i: ; preds = %25, %pregion_for_entry..i.i | |
%29 = trunc i64 %_local_id_x.i.0 to i32 | |
%conv2.i.i.1 = or i32 %29, 1 | |
%add5.i.i.1 = sub i32 %add4.i.i, %conv2.i.i.1 | |
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1 | |
br i1 %cmp.i.i.1, label %30, label %.r_exit.i.i.1 | |
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i.1 | |
ret void | |
; <label>:30: ; preds = %.r_exit.i.i | |
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1 | |
%add13.i.i.1 = add i32 %add10.i.i.1, %24 | |
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64 | |
%arrayidx.i.i.1 = getelementptr inbounds double, double* %20, i64 %idxprom.i.i.1 | |
%31 = bitcast double* %arrayidx.i.i.1 to i64* | |
%32 = load i64, i64* %31, align 8, !tbaa !12, !alias.scope !25, !noalias !30, !llvm.mem.parallel_loop_access !21 | |
%add20.i.i.1 = add i32 %add10.i.i.1, %16 | |
%idxprom21.i.i.1 = sext i32 %add20.i.i.1 to i64 | |
%arrayidx22.i.i.1 = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i.1 | |
%33 = bitcast double* %arrayidx22.i.i.1 to i64* | |
store i64 %32, i64* %33, align 8, !tbaa !12, !alias.scope !30, !noalias !25, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.i.1 | |
.r_exit.i.i.1: ; preds = %30, %.r_exit.i.i | |
%34 = add nuw nsw i64 %_local_id_x.i.0, 2 | |
%exitcond.1 = icmp eq i64 %34, 128 | |
br i1 %exitcond.1, label %_pocl_kernel_grudge_assign_0.exit, label %pregion_for_entry..i.i, !llvm.loop !22 | |
} | |
; Function Attrs: norecurse nounwind | |
define void @_pocl_kernel_grudge_assign_0_workgroup_fast(i8** nocapture readonly, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #1 { | |
%6 = bitcast i8** %0 to i32** | |
%7 = load i32*, i32** %6, align 8 | |
%8 = load i32, i32* %7, align 4 | |
%9 = getelementptr i8*, i8** %0, i64 1 | |
%10 = bitcast i8** %9 to double** | |
%11 = load double*, double** %10, align 8 | |
%12 = getelementptr i8*, i8** %0, i64 2 | |
%13 = bitcast i8** %12 to i32** | |
%14 = load i32*, i32** %13, align 8 | |
%15 = load i32, i32* %14, align 4 | |
%16 = getelementptr i8*, i8** %0, i64 3 | |
%17 = bitcast i8** %16 to double** | |
%18 = load double*, double** %17, align 8 | |
%19 = getelementptr i8*, i8** %0, i64 4 | |
%20 = bitcast i8** %19 to i32** | |
%21 = load i32*, i32** %20, align 8 | |
%22 = load i32, i32* %21, align 4 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%add.i.i = add i32 %mul.i.i, -1 | |
%add4.i.i = add i32 %add.i.i, %8 | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5 | |
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %32, %.r_exit.i.i.1 ] | |
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32 | |
%add5.i.i = sub i32 %add4.i.i, %conv2.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %23, label %.r_exit.i.i | |
; <label>:23: ; preds = %pregion_for_entry..i.i | |
%add10.i.i = add nuw nsw i32 %mul9.i.i, %conv2.i.i | |
%add13.i.i = add i32 %add10.i.i, %22 | |
%idxprom.i.i = sext i32 %add13.i.i to i64 | |
%arrayidx.i.i = getelementptr inbounds double, double* %18, i64 %idxprom.i.i | |
%24 = bitcast double* %arrayidx.i.i to i64* | |
%25 = load i64, i64* %24, align 8, !tbaa !12, !alias.scope !33, !noalias !38, !llvm.mem.parallel_loop_access !21 | |
%add20.i.i = add i32 %add10.i.i, %15 | |
%idxprom21.i.i = sext i32 %add20.i.i to i64 | |
%arrayidx22.i.i = getelementptr inbounds double, double* %11, i64 %idxprom21.i.i | |
%26 = bitcast double* %arrayidx22.i.i to i64* | |
store i64 %25, i64* %26, align 8, !tbaa !12, !alias.scope !38, !noalias !33, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.i | |
.r_exit.i.i: ; preds = %23, %pregion_for_entry..i.i | |
%27 = trunc i64 %_local_id_x.i.0 to i32 | |
%conv2.i.i.1 = or i32 %27, 1 | |
%add5.i.i.1 = sub i32 %add4.i.i, %conv2.i.i.1 | |
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1 | |
br i1 %cmp.i.i.1, label %28, label %.r_exit.i.i.1 | |
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i.1 | |
ret void | |
; <label>:28: ; preds = %.r_exit.i.i | |
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1 | |
%add13.i.i.1 = add i32 %add10.i.i.1, %22 | |
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64 | |
%arrayidx.i.i.1 = getelementptr inbounds double, double* %18, i64 %idxprom.i.i.1 | |
%29 = bitcast double* %arrayidx.i.i.1 to i64* | |
%30 = load i64, i64* %29, align 8, !tbaa !12, !alias.scope !33, !noalias !38, !llvm.mem.parallel_loop_access !21 | |
%add20.i.i.1 = add i32 %add10.i.i.1, %15 | |
%idxprom21.i.i.1 = sext i32 %add20.i.i.1 to i64 | |
%arrayidx22.i.i.1 = getelementptr inbounds double, double* %11, i64 %idxprom21.i.i.1 | |
%31 = bitcast double* %arrayidx22.i.i.1 to i64* | |
store i64 %30, i64* %31, align 8, !tbaa !12, !alias.scope !38, !noalias !33, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.i.1 | |
.r_exit.i.i.1: ; preds = %28, %.r_exit.i.i | |
%32 = add nuw nsw i64 %_local_id_x.i.0, 2 | |
%exitcond.1 = icmp eq i64 %32, 128 | |
br i1 %exitcond.1, label %_pocl_kernel_grudge_assign_0.exit, label %pregion_for_entry..i.i, !llvm.loop !22 | |
} | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>) #2 | |
attributes #0 = { alwaysinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #1 = { norecurse nounwind } | |
attributes #2 = { argmemonly nounwind } | |
!llvm.module.flags = !{!0, !1, !2} | |
!opencl.ocl.version = !{!3} | |
!llvm.ident = !{!4} | |
!opencl.spir.version = !{!3} | |
!0 = !{i32 1, !"wchar_size", i32 4} | |
!1 = !{i32 7, !"PIC Level", i32 2} | |
!2 = !{i32 7, !"PIE Level", i32 2} | |
!3 = !{i32 1, i32 2} | |
!4 = !{!"clang version 6.0.1 (tags/RELEASE_601/final)"} | |
!5 = !{i32 0, i32 1, i32 0, i32 1, i32 0} | |
!6 = !{!"none", !"none", !"none", !"none", !"none"} | |
!7 = !{!"int", !"double*", !"int", !"double*", !"int"} | |
!8 = !{!"", !"restrict", !"", !"restrict const", !""} | |
!9 = !{!"grdg_n", !"expr_8", !"expr_8_offset", !"grdg_sub_discr_dx0_dr0", !"grdg_sub_discr_dx0_dr0_offset"} | |
!10 = !{i32 128, i32 1, i32 1} | |
!11 = !{i32 1} | |
!12 = !{!13, !13, i64 0} | |
!13 = !{!"double", !14, i64 0} | |
!14 = !{!"omnipotent char", !15, i64 0} | |
!15 = !{!"Simple C/C++ TBAA"} | |
!16 = !{!17} | |
!17 = distinct !{!17, !18, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!18 = distinct !{!18, !"grudge_assign_0"} | |
!19 = !{!20} | |
!20 = distinct !{!20, !18, !"grudge_assign_0: %expr_8"} | |
!21 = !{!22} | |
!22 = distinct !{!22} | |
!23 = distinct !{!23, !24} | |
!24 = !{!"llvm.loop.isvectorized", i32 1} | |
!25 = !{!26, !28} | |
!26 = distinct !{!26, !27, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!27 = distinct !{!27, !"grudge_assign_0"} | |
!28 = distinct !{!28, !29, !"_pocl_kernel_grudge_assign_0: argument 1"} | |
!29 = distinct !{!29, !"_pocl_kernel_grudge_assign_0"} | |
!30 = !{!31, !32} | |
!31 = distinct !{!31, !27, !"grudge_assign_0: %expr_8"} | |
!32 = distinct !{!32, !29, !"_pocl_kernel_grudge_assign_0: argument 0"} | |
!33 = !{!34, !36} | |
!34 = distinct !{!34, !35, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!35 = distinct !{!35, !"grudge_assign_0"} | |
!36 = distinct !{!36, !37, !"_pocl_kernel_grudge_assign_0: argument 1"} | |
!37 = distinct !{!37, !"_pocl_kernel_grudge_assign_0"} | |
!38 = !{!39, !40} | |
!39 = distinct !{!39, !35, !"grudge_assign_0: %expr_8"} | |
!40 = distinct !{!40, !37, !"_pocl_kernel_grudge_assign_0: argument 0"} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'in2.ll' | |
source_filename = "parallel_bc" | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux-gnu" | |
; Function Attrs: alwaysinline norecurse nounwind | |
define void @_pocl_kernel_grudge_assign_0(i32, double* noalias nocapture, i32, double* noalias nocapture readonly, i32, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 !pocl_generated !11 { | |
vector.scevcheck: | |
%conv.i = trunc i64 %6 to i32 | |
%mul.i = mul nsw i32 %conv.i, -128 | |
%add4.i = add i32 %mul.i, %0 | |
%mul9.i = shl nsw i32 %conv.i, 7 | |
%9 = shl i32 %conv.i, 7 | |
%10 = add i32 %9, %4 | |
%11 = icmp sgt i32 %10, 2147483520 | |
%12 = add i32 %9, %2 | |
%13 = icmp sgt i32 %12, 2147483520 | |
%14 = or i1 %11, %13 | |
br i1 %14, label %pregion_for_entry..i, label %vector.ph | |
vector.ph: ; preds = %vector.scevcheck | |
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer | |
%broadcast.splatinsert16 = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat17 = shufflevector <4 x i32> %broadcast.splatinsert16, <4 x i32> undef, <4 x i32> zeroinitializer | |
%broadcast.splatinsert18 = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat19 = shufflevector <4 x i32> %broadcast.splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer | |
%broadcast.splatinsert20 = insertelement <4 x i32> undef, i32 %add4.i, i32 0 | |
%broadcast.splat21 = shufflevector <4 x i32> %broadcast.splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer | |
%15 = add <4 x i32> %broadcast.splat, <i32 -1, i32 -2, i32 -3, i32 -4> | |
%16 = add <4 x i32> %broadcast.splat17, <i32 -5, i32 -6, i32 -7, i32 -8> | |
%17 = add <4 x i32> %broadcast.splat19, <i32 -9, i32 -10, i32 -11, i32 -12> | |
%18 = add <4 x i32> %broadcast.splat21, <i32 -13, i32 -14, i32 -15, i32 -16> | |
%19 = icmp sgt <4 x i32> %15, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%20 = icmp sgt <4 x i32> %16, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%21 = icmp sgt <4 x i32> %17, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%22 = icmp sgt <4 x i32> %18, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%23 = add i32 %mul9.i, %4 | |
%24 = sext i32 %23 to i64 | |
%25 = getelementptr inbounds double, double* %3, i64 %24 | |
%26 = bitcast double* %25 to <4 x i64>* | |
%wide.load = load <4 x i64>, <4 x i64>* %26, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%27 = getelementptr inbounds double, double* %25, i64 4 | |
%28 = bitcast double* %27 to <4 x i64>* | |
%wide.load22 = load <4 x i64>, <4 x i64>* %28, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%29 = getelementptr inbounds double, double* %25, i64 8 | |
%30 = bitcast double* %29 to <4 x i64>* | |
%wide.load23 = load <4 x i64>, <4 x i64>* %30, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%31 = getelementptr inbounds double, double* %25, i64 12 | |
%32 = bitcast double* %31 to <4 x i64>* | |
%wide.load24 = load <4 x i64>, <4 x i64>* %32, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%33 = add i32 %mul9.i, %2 | |
%34 = sext i32 %33 to i64 | |
%35 = getelementptr inbounds double, double* %1, i64 %34 | |
%36 = bitcast double* %35 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load, <4 x i64>* %36, i32 8, <4 x i1> %19), !tbaa !12, !alias.scope !19, !noalias !16 | |
%37 = getelementptr inbounds double, double* %35, i64 4 | |
%38 = bitcast double* %37 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22, <4 x i64>* %38, i32 8, <4 x i1> %20), !tbaa !12, !alias.scope !19, !noalias !16 | |
%39 = getelementptr inbounds double, double* %35, i64 8 | |
%40 = bitcast double* %39 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23, <4 x i64>* %40, i32 8, <4 x i1> %21), !tbaa !12, !alias.scope !19, !noalias !16 | |
%41 = getelementptr inbounds double, double* %35, i64 12 | |
%42 = bitcast double* %41 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24, <4 x i64>* %42, i32 8, <4 x i1> %22), !tbaa !12, !alias.scope !19, !noalias !16 | |
%43 = add <4 x i32> %broadcast.splat, <i32 -17, i32 -18, i32 -19, i32 -20> | |
%44 = add <4 x i32> %broadcast.splat17, <i32 -21, i32 -22, i32 -23, i32 -24> | |
%45 = add <4 x i32> %broadcast.splat19, <i32 -25, i32 -26, i32 -27, i32 -28> | |
%46 = add <4 x i32> %broadcast.splat21, <i32 -29, i32 -30, i32 -31, i32 -32> | |
%47 = icmp sgt <4 x i32> %43, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%48 = icmp sgt <4 x i32> %44, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%49 = icmp sgt <4 x i32> %45, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%50 = icmp sgt <4 x i32> %46, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%51 = or i32 %mul9.i, 16 | |
%52 = add i32 %51, %4 | |
%53 = sext i32 %52 to i64 | |
%54 = getelementptr inbounds double, double* %3, i64 %53 | |
%55 = bitcast double* %54 to <4 x i64>* | |
%wide.load.1 = load <4 x i64>, <4 x i64>* %55, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%56 = getelementptr inbounds double, double* %54, i64 4 | |
%57 = bitcast double* %56 to <4 x i64>* | |
%wide.load22.1 = load <4 x i64>, <4 x i64>* %57, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%58 = getelementptr inbounds double, double* %54, i64 8 | |
%59 = bitcast double* %58 to <4 x i64>* | |
%wide.load23.1 = load <4 x i64>, <4 x i64>* %59, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%60 = getelementptr inbounds double, double* %54, i64 12 | |
%61 = bitcast double* %60 to <4 x i64>* | |
%wide.load24.1 = load <4 x i64>, <4 x i64>* %61, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%62 = add i32 %51, %2 | |
%63 = sext i32 %62 to i64 | |
%64 = getelementptr inbounds double, double* %1, i64 %63 | |
%65 = bitcast double* %64 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.1, <4 x i64>* %65, i32 8, <4 x i1> %47), !tbaa !12, !alias.scope !19, !noalias !16 | |
%66 = getelementptr inbounds double, double* %64, i64 4 | |
%67 = bitcast double* %66 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.1, <4 x i64>* %67, i32 8, <4 x i1> %48), !tbaa !12, !alias.scope !19, !noalias !16 | |
%68 = getelementptr inbounds double, double* %64, i64 8 | |
%69 = bitcast double* %68 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.1, <4 x i64>* %69, i32 8, <4 x i1> %49), !tbaa !12, !alias.scope !19, !noalias !16 | |
%70 = getelementptr inbounds double, double* %64, i64 12 | |
%71 = bitcast double* %70 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.1, <4 x i64>* %71, i32 8, <4 x i1> %50), !tbaa !12, !alias.scope !19, !noalias !16 | |
%72 = add <4 x i32> %broadcast.splat, <i32 -33, i32 -34, i32 -35, i32 -36> | |
%73 = add <4 x i32> %broadcast.splat17, <i32 -37, i32 -38, i32 -39, i32 -40> | |
%74 = add <4 x i32> %broadcast.splat19, <i32 -41, i32 -42, i32 -43, i32 -44> | |
%75 = add <4 x i32> %broadcast.splat21, <i32 -45, i32 -46, i32 -47, i32 -48> | |
%76 = icmp sgt <4 x i32> %72, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%77 = icmp sgt <4 x i32> %73, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%78 = icmp sgt <4 x i32> %74, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%79 = icmp sgt <4 x i32> %75, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%80 = or i32 %mul9.i, 32 | |
%81 = add i32 %80, %4 | |
%82 = sext i32 %81 to i64 | |
%83 = getelementptr inbounds double, double* %3, i64 %82 | |
%84 = bitcast double* %83 to <4 x i64>* | |
%wide.load.2 = load <4 x i64>, <4 x i64>* %84, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%85 = getelementptr inbounds double, double* %83, i64 4 | |
%86 = bitcast double* %85 to <4 x i64>* | |
%wide.load22.2 = load <4 x i64>, <4 x i64>* %86, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%87 = getelementptr inbounds double, double* %83, i64 8 | |
%88 = bitcast double* %87 to <4 x i64>* | |
%wide.load23.2 = load <4 x i64>, <4 x i64>* %88, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%89 = getelementptr inbounds double, double* %83, i64 12 | |
%90 = bitcast double* %89 to <4 x i64>* | |
%wide.load24.2 = load <4 x i64>, <4 x i64>* %90, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%91 = add i32 %80, %2 | |
%92 = sext i32 %91 to i64 | |
%93 = getelementptr inbounds double, double* %1, i64 %92 | |
%94 = bitcast double* %93 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.2, <4 x i64>* %94, i32 8, <4 x i1> %76), !tbaa !12, !alias.scope !19, !noalias !16 | |
%95 = getelementptr inbounds double, double* %93, i64 4 | |
%96 = bitcast double* %95 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.2, <4 x i64>* %96, i32 8, <4 x i1> %77), !tbaa !12, !alias.scope !19, !noalias !16 | |
%97 = getelementptr inbounds double, double* %93, i64 8 | |
%98 = bitcast double* %97 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.2, <4 x i64>* %98, i32 8, <4 x i1> %78), !tbaa !12, !alias.scope !19, !noalias !16 | |
%99 = getelementptr inbounds double, double* %93, i64 12 | |
%100 = bitcast double* %99 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.2, <4 x i64>* %100, i32 8, <4 x i1> %79), !tbaa !12, !alias.scope !19, !noalias !16 | |
%101 = add <4 x i32> %broadcast.splat, <i32 -49, i32 -50, i32 -51, i32 -52> | |
%102 = add <4 x i32> %broadcast.splat17, <i32 -53, i32 -54, i32 -55, i32 -56> | |
%103 = add <4 x i32> %broadcast.splat19, <i32 -57, i32 -58, i32 -59, i32 -60> | |
%104 = add <4 x i32> %broadcast.splat21, <i32 -61, i32 -62, i32 -63, i32 -64> | |
%105 = icmp sgt <4 x i32> %101, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%106 = icmp sgt <4 x i32> %102, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%107 = icmp sgt <4 x i32> %103, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%108 = icmp sgt <4 x i32> %104, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%109 = or i32 %mul9.i, 48 | |
%110 = add i32 %109, %4 | |
%111 = sext i32 %110 to i64 | |
%112 = getelementptr inbounds double, double* %3, i64 %111 | |
%113 = bitcast double* %112 to <4 x i64>* | |
%wide.load.3 = load <4 x i64>, <4 x i64>* %113, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%114 = getelementptr inbounds double, double* %112, i64 4 | |
%115 = bitcast double* %114 to <4 x i64>* | |
%wide.load22.3 = load <4 x i64>, <4 x i64>* %115, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%116 = getelementptr inbounds double, double* %112, i64 8 | |
%117 = bitcast double* %116 to <4 x i64>* | |
%wide.load23.3 = load <4 x i64>, <4 x i64>* %117, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%118 = getelementptr inbounds double, double* %112, i64 12 | |
%119 = bitcast double* %118 to <4 x i64>* | |
%wide.load24.3 = load <4 x i64>, <4 x i64>* %119, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%120 = add i32 %109, %2 | |
%121 = sext i32 %120 to i64 | |
%122 = getelementptr inbounds double, double* %1, i64 %121 | |
%123 = bitcast double* %122 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.3, <4 x i64>* %123, i32 8, <4 x i1> %105), !tbaa !12, !alias.scope !19, !noalias !16 | |
%124 = getelementptr inbounds double, double* %122, i64 4 | |
%125 = bitcast double* %124 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.3, <4 x i64>* %125, i32 8, <4 x i1> %106), !tbaa !12, !alias.scope !19, !noalias !16 | |
%126 = getelementptr inbounds double, double* %122, i64 8 | |
%127 = bitcast double* %126 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.3, <4 x i64>* %127, i32 8, <4 x i1> %107), !tbaa !12, !alias.scope !19, !noalias !16 | |
%128 = getelementptr inbounds double, double* %122, i64 12 | |
%129 = bitcast double* %128 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.3, <4 x i64>* %129, i32 8, <4 x i1> %108), !tbaa !12, !alias.scope !19, !noalias !16 | |
%130 = add <4 x i32> %broadcast.splat, <i32 -65, i32 -66, i32 -67, i32 -68> | |
%131 = add <4 x i32> %broadcast.splat17, <i32 -69, i32 -70, i32 -71, i32 -72> | |
%132 = add <4 x i32> %broadcast.splat19, <i32 -73, i32 -74, i32 -75, i32 -76> | |
%133 = add <4 x i32> %broadcast.splat21, <i32 -77, i32 -78, i32 -79, i32 -80> | |
%134 = icmp sgt <4 x i32> %130, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%135 = icmp sgt <4 x i32> %131, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%136 = icmp sgt <4 x i32> %132, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%137 = icmp sgt <4 x i32> %133, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%138 = or i32 %mul9.i, 64 | |
%139 = add i32 %138, %4 | |
%140 = sext i32 %139 to i64 | |
%141 = getelementptr inbounds double, double* %3, i64 %140 | |
%142 = bitcast double* %141 to <4 x i64>* | |
%wide.load.4 = load <4 x i64>, <4 x i64>* %142, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%143 = getelementptr inbounds double, double* %141, i64 4 | |
%144 = bitcast double* %143 to <4 x i64>* | |
%wide.load22.4 = load <4 x i64>, <4 x i64>* %144, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%145 = getelementptr inbounds double, double* %141, i64 8 | |
%146 = bitcast double* %145 to <4 x i64>* | |
%wide.load23.4 = load <4 x i64>, <4 x i64>* %146, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%147 = getelementptr inbounds double, double* %141, i64 12 | |
%148 = bitcast double* %147 to <4 x i64>* | |
%wide.load24.4 = load <4 x i64>, <4 x i64>* %148, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%149 = add i32 %138, %2 | |
%150 = sext i32 %149 to i64 | |
%151 = getelementptr inbounds double, double* %1, i64 %150 | |
%152 = bitcast double* %151 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.4, <4 x i64>* %152, i32 8, <4 x i1> %134), !tbaa !12, !alias.scope !19, !noalias !16 | |
%153 = getelementptr inbounds double, double* %151, i64 4 | |
%154 = bitcast double* %153 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.4, <4 x i64>* %154, i32 8, <4 x i1> %135), !tbaa !12, !alias.scope !19, !noalias !16 | |
%155 = getelementptr inbounds double, double* %151, i64 8 | |
%156 = bitcast double* %155 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.4, <4 x i64>* %156, i32 8, <4 x i1> %136), !tbaa !12, !alias.scope !19, !noalias !16 | |
%157 = getelementptr inbounds double, double* %151, i64 12 | |
%158 = bitcast double* %157 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.4, <4 x i64>* %158, i32 8, <4 x i1> %137), !tbaa !12, !alias.scope !19, !noalias !16 | |
%159 = add <4 x i32> %broadcast.splat, <i32 -81, i32 -82, i32 -83, i32 -84> | |
%160 = add <4 x i32> %broadcast.splat17, <i32 -85, i32 -86, i32 -87, i32 -88> | |
%161 = add <4 x i32> %broadcast.splat19, <i32 -89, i32 -90, i32 -91, i32 -92> | |
%162 = add <4 x i32> %broadcast.splat21, <i32 -93, i32 -94, i32 -95, i32 -96> | |
%163 = icmp sgt <4 x i32> %159, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%164 = icmp sgt <4 x i32> %160, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%165 = icmp sgt <4 x i32> %161, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%166 = icmp sgt <4 x i32> %162, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%167 = or i32 %mul9.i, 80 | |
%168 = add i32 %167, %4 | |
%169 = sext i32 %168 to i64 | |
%170 = getelementptr inbounds double, double* %3, i64 %169 | |
%171 = bitcast double* %170 to <4 x i64>* | |
%wide.load.5 = load <4 x i64>, <4 x i64>* %171, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%172 = getelementptr inbounds double, double* %170, i64 4 | |
%173 = bitcast double* %172 to <4 x i64>* | |
%wide.load22.5 = load <4 x i64>, <4 x i64>* %173, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%174 = getelementptr inbounds double, double* %170, i64 8 | |
%175 = bitcast double* %174 to <4 x i64>* | |
%wide.load23.5 = load <4 x i64>, <4 x i64>* %175, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%176 = getelementptr inbounds double, double* %170, i64 12 | |
%177 = bitcast double* %176 to <4 x i64>* | |
%wide.load24.5 = load <4 x i64>, <4 x i64>* %177, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%178 = add i32 %167, %2 | |
%179 = sext i32 %178 to i64 | |
%180 = getelementptr inbounds double, double* %1, i64 %179 | |
%181 = bitcast double* %180 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.5, <4 x i64>* %181, i32 8, <4 x i1> %163), !tbaa !12, !alias.scope !19, !noalias !16 | |
%182 = getelementptr inbounds double, double* %180, i64 4 | |
%183 = bitcast double* %182 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.5, <4 x i64>* %183, i32 8, <4 x i1> %164), !tbaa !12, !alias.scope !19, !noalias !16 | |
%184 = getelementptr inbounds double, double* %180, i64 8 | |
%185 = bitcast double* %184 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.5, <4 x i64>* %185, i32 8, <4 x i1> %165), !tbaa !12, !alias.scope !19, !noalias !16 | |
%186 = getelementptr inbounds double, double* %180, i64 12 | |
%187 = bitcast double* %186 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.5, <4 x i64>* %187, i32 8, <4 x i1> %166), !tbaa !12, !alias.scope !19, !noalias !16 | |
%188 = add <4 x i32> %broadcast.splat, <i32 -97, i32 -98, i32 -99, i32 -100> | |
%189 = add <4 x i32> %broadcast.splat17, <i32 -101, i32 -102, i32 -103, i32 -104> | |
%190 = add <4 x i32> %broadcast.splat19, <i32 -105, i32 -106, i32 -107, i32 -108> | |
%191 = add <4 x i32> %broadcast.splat21, <i32 -109, i32 -110, i32 -111, i32 -112> | |
%192 = icmp sgt <4 x i32> %188, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%193 = icmp sgt <4 x i32> %189, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%194 = icmp sgt <4 x i32> %190, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%195 = icmp sgt <4 x i32> %191, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%196 = or i32 %mul9.i, 96 | |
%197 = add i32 %196, %4 | |
%198 = sext i32 %197 to i64 | |
%199 = getelementptr inbounds double, double* %3, i64 %198 | |
%200 = bitcast double* %199 to <4 x i64>* | |
%wide.load.6 = load <4 x i64>, <4 x i64>* %200, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%201 = getelementptr inbounds double, double* %199, i64 4 | |
%202 = bitcast double* %201 to <4 x i64>* | |
%wide.load22.6 = load <4 x i64>, <4 x i64>* %202, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%203 = getelementptr inbounds double, double* %199, i64 8 | |
%204 = bitcast double* %203 to <4 x i64>* | |
%wide.load23.6 = load <4 x i64>, <4 x i64>* %204, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%205 = getelementptr inbounds double, double* %199, i64 12 | |
%206 = bitcast double* %205 to <4 x i64>* | |
%wide.load24.6 = load <4 x i64>, <4 x i64>* %206, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%207 = add i32 %196, %2 | |
%208 = sext i32 %207 to i64 | |
%209 = getelementptr inbounds double, double* %1, i64 %208 | |
%210 = bitcast double* %209 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.6, <4 x i64>* %210, i32 8, <4 x i1> %192), !tbaa !12, !alias.scope !19, !noalias !16 | |
%211 = getelementptr inbounds double, double* %209, i64 4 | |
%212 = bitcast double* %211 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.6, <4 x i64>* %212, i32 8, <4 x i1> %193), !tbaa !12, !alias.scope !19, !noalias !16 | |
%213 = getelementptr inbounds double, double* %209, i64 8 | |
%214 = bitcast double* %213 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.6, <4 x i64>* %214, i32 8, <4 x i1> %194), !tbaa !12, !alias.scope !19, !noalias !16 | |
%215 = getelementptr inbounds double, double* %209, i64 12 | |
%216 = bitcast double* %215 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.6, <4 x i64>* %216, i32 8, <4 x i1> %195), !tbaa !12, !alias.scope !19, !noalias !16 | |
%217 = add <4 x i32> %broadcast.splat, <i32 -113, i32 -114, i32 -115, i32 -116> | |
%218 = add <4 x i32> %broadcast.splat17, <i32 -117, i32 -118, i32 -119, i32 -120> | |
%219 = add <4 x i32> %broadcast.splat19, <i32 -121, i32 -122, i32 -123, i32 -124> | |
%220 = add <4 x i32> %broadcast.splat21, <i32 -125, i32 -126, i32 -127, i32 -128> | |
%221 = icmp sgt <4 x i32> %217, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%222 = icmp sgt <4 x i32> %218, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%223 = icmp sgt <4 x i32> %219, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%224 = icmp sgt <4 x i32> %220, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%225 = or i32 %mul9.i, 112 | |
%226 = add i32 %225, %4 | |
%227 = sext i32 %226 to i64 | |
%228 = getelementptr inbounds double, double* %3, i64 %227 | |
%229 = bitcast double* %228 to <4 x i64>* | |
%wide.load.7 = load <4 x i64>, <4 x i64>* %229, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%230 = getelementptr inbounds double, double* %228, i64 4 | |
%231 = bitcast double* %230 to <4 x i64>* | |
%wide.load22.7 = load <4 x i64>, <4 x i64>* %231, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%232 = getelementptr inbounds double, double* %228, i64 8 | |
%233 = bitcast double* %232 to <4 x i64>* | |
%wide.load23.7 = load <4 x i64>, <4 x i64>* %233, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%234 = getelementptr inbounds double, double* %228, i64 12 | |
%235 = bitcast double* %234 to <4 x i64>* | |
%wide.load24.7 = load <4 x i64>, <4 x i64>* %235, align 8, !tbaa !12, !alias.scope !16, !noalias !19 | |
%236 = add i32 %225, %2 | |
%237 = sext i32 %236 to i64 | |
%238 = getelementptr inbounds double, double* %1, i64 %237 | |
%239 = bitcast double* %238 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.7, <4 x i64>* %239, i32 8, <4 x i1> %221), !tbaa !12, !alias.scope !19, !noalias !16 | |
%240 = getelementptr inbounds double, double* %238, i64 4 | |
%241 = bitcast double* %240 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.7, <4 x i64>* %241, i32 8, <4 x i1> %222), !tbaa !12, !alias.scope !19, !noalias !16 | |
%242 = getelementptr inbounds double, double* %238, i64 8 | |
%243 = bitcast double* %242 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.7, <4 x i64>* %243, i32 8, <4 x i1> %223), !tbaa !12, !alias.scope !19, !noalias !16 | |
%244 = getelementptr inbounds double, double* %238, i64 12 | |
%245 = bitcast double* %244 to <4 x i64>* | |
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.7, <4 x i64>* %245, i32 8, <4 x i1> %224), !tbaa !12, !alias.scope !19, !noalias !16 | |
br label %grudge_assign_0.exit | |
pregion_for_entry..i: ; preds = %vector.scevcheck, %.r_exit.i.1 | |
%_local_id_x.0 = phi i64 [ %255, %.r_exit.i.1 ], [ 0, %vector.scevcheck ] | |
%conv2.i = trunc i64 %_local_id_x.0 to i32 | |
%add.i = xor i32 %conv2.i, -1 | |
%add5.i = add i32 %add4.i, %add.i | |
%cmp.i = icmp sgt i32 %add5.i, -1 | |
br i1 %cmp.i, label %246, label %.r_exit.i | |
; <label>:246: ; preds = %pregion_for_entry..i | |
%add10.i = add nuw nsw i32 %mul9.i, %conv2.i | |
%add13.i = add i32 %add10.i, %4 | |
%idxprom.i = sext i32 %add13.i to i64 | |
%arrayidx.i = getelementptr inbounds double, double* %3, i64 %idxprom.i | |
%247 = bitcast double* %arrayidx.i to i64* | |
%248 = load i64, i64* %247, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21 | |
%add20.i = add i32 %add10.i, %2 | |
%idxprom21.i = sext i32 %add20.i to i64 | |
%arrayidx22.i = getelementptr inbounds double, double* %1, i64 %idxprom21.i | |
%249 = bitcast double* %arrayidx22.i to i64* | |
store i64 %248, i64* %249, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i | |
.r_exit.i: ; preds = %246, %pregion_for_entry..i | |
%250 = trunc i64 %_local_id_x.0 to i32 | |
%add.i.1 = xor i32 %250, -2 | |
%add5.i.1 = add i32 %add4.i, %add.i.1 | |
%cmp.i.1 = icmp sgt i32 %add5.i.1, -1 | |
br i1 %cmp.i.1, label %251, label %.r_exit.i.1 | |
grudge_assign_0.exit: ; preds = %.r_exit.i.1, %vector.ph | |
ret void | |
; <label>:251: ; preds = %.r_exit.i | |
%conv2.i.1 = or i32 %250, 1 | |
%add10.i.1 = add nuw nsw i32 %mul9.i, %conv2.i.1 | |
%add13.i.1 = add i32 %add10.i.1, %4 | |
%idxprom.i.1 = sext i32 %add13.i.1 to i64 | |
%arrayidx.i.1 = getelementptr inbounds double, double* %3, i64 %idxprom.i.1 | |
%252 = bitcast double* %arrayidx.i.1 to i64* | |
%253 = load i64, i64* %252, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21 | |
%add20.i.1 = add i32 %add10.i.1, %2 | |
%idxprom21.i.1 = sext i32 %add20.i.1 to i64 | |
%arrayidx22.i.1 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.1 | |
%254 = bitcast double* %arrayidx22.i.1 to i64* | |
store i64 %253, i64* %254, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.1 | |
.r_exit.i.1: ; preds = %251, %.r_exit.i | |
%255 = add nuw nsw i64 %_local_id_x.0, 2 | |
%exitcond.1 = icmp eq i64 %255, 128 | |
br i1 %exitcond.1, label %grudge_assign_0.exit, label %pregion_for_entry..i, !llvm.loop !23 | |
} | |
; Function Attrs: norecurse nounwind | |
define void @_pocl_kernel_grudge_assign_0_workgroup(i8** nocapture readonly, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #1 { | |
%6 = bitcast i8** %0 to i32** | |
%7 = load i32*, i32** %6, align 8 | |
%8 = load i32, i32* %7, align 4 | |
%9 = getelementptr i8*, i8** %0, i64 1 | |
%10 = bitcast i8** %9 to double*** | |
%11 = load double**, double*** %10, align 8 | |
%12 = load double*, double** %11, align 8 | |
%13 = getelementptr i8*, i8** %0, i64 2 | |
%14 = bitcast i8** %13 to i32** | |
%15 = load i32*, i32** %14, align 8 | |
%16 = load i32, i32* %15, align 4 | |
%17 = getelementptr i8*, i8** %0, i64 3 | |
%18 = bitcast i8** %17 to double*** | |
%19 = load double**, double*** %18, align 8 | |
%20 = load double*, double** %19, align 8 | |
%21 = getelementptr i8*, i8** %0, i64 4 | |
%22 = bitcast i8** %21 to i32** | |
%23 = load i32*, i32** %22, align 8 | |
%24 = load i32, i32* %23, align 4 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%add4.i.i = add i32 %8, %mul.i.i | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5 | |
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %34, %.r_exit.i.i.1 ] | |
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32 | |
%add.i.i = xor i32 %conv2.i.i, -1 | |
%add5.i.i = add i32 %add4.i.i, %add.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %25, label %.r_exit.i.i | |
; <label>:25: ; preds = %pregion_for_entry..i.i | |
%add10.i.i = add nuw nsw i32 %mul9.i.i, %conv2.i.i | |
%add13.i.i = add i32 %add10.i.i, %24 | |
%idxprom.i.i = sext i32 %add13.i.i to i64 | |
%arrayidx.i.i = getelementptr inbounds double, double* %20, i64 %idxprom.i.i | |
%26 = bitcast double* %arrayidx.i.i to i64* | |
%27 = load i64, i64* %26, align 8, !tbaa !12, !alias.scope !25, !noalias !30, !llvm.mem.parallel_loop_access !21 | |
%add20.i.i = add i32 %add10.i.i, %16 | |
%idxprom21.i.i = sext i32 %add20.i.i to i64 | |
%arrayidx22.i.i = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i | |
%28 = bitcast double* %arrayidx22.i.i to i64* | |
store i64 %27, i64* %28, align 8, !tbaa !12, !alias.scope !30, !noalias !25, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.i | |
.r_exit.i.i: ; preds = %25, %pregion_for_entry..i.i | |
%29 = trunc i64 %_local_id_x.i.0 to i32 | |
%add.i.i.1 = xor i32 %29, -2 | |
%add5.i.i.1 = add i32 %add4.i.i, %add.i.i.1 | |
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1 | |
br i1 %cmp.i.i.1, label %30, label %.r_exit.i.i.1 | |
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i.1 | |
ret void | |
; <label>:30: ; preds = %.r_exit.i.i | |
%conv2.i.i.1 = or i32 %29, 1 | |
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1 | |
%add13.i.i.1 = add i32 %add10.i.i.1, %24 | |
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64 | |
%arrayidx.i.i.1 = getelementptr inbounds double, double* %20, i64 %idxprom.i.i.1 | |
%31 = bitcast double* %arrayidx.i.i.1 to i64* | |
%32 = load i64, i64* %31, align 8, !tbaa !12, !alias.scope !25, !noalias !30, !llvm.mem.parallel_loop_access !21 | |
%add20.i.i.1 = add i32 %add10.i.i.1, %16 | |
%idxprom21.i.i.1 = sext i32 %add20.i.i.1 to i64 | |
%arrayidx22.i.i.1 = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i.1 | |
%33 = bitcast double* %arrayidx22.i.i.1 to i64* | |
store i64 %32, i64* %33, align 8, !tbaa !12, !alias.scope !30, !noalias !25, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.i.1 | |
.r_exit.i.i.1: ; preds = %30, %.r_exit.i.i | |
%34 = add nuw nsw i64 %_local_id_x.i.0, 2 | |
%exitcond.1 = icmp eq i64 %34, 128 | |
br i1 %exitcond.1, label %_pocl_kernel_grudge_assign_0.exit, label %pregion_for_entry..i.i, !llvm.loop !22 | |
} | |
; Function Attrs: norecurse nounwind | |
define void @_pocl_kernel_grudge_assign_0_workgroup_fast(i8** nocapture readonly, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #1 { | |
%6 = bitcast i8** %0 to i32** | |
%7 = load i32*, i32** %6, align 8 | |
%8 = load i32, i32* %7, align 4 | |
%9 = getelementptr i8*, i8** %0, i64 1 | |
%10 = bitcast i8** %9 to double** | |
%11 = load double*, double** %10, align 8 | |
%12 = getelementptr i8*, i8** %0, i64 2 | |
%13 = bitcast i8** %12 to i32** | |
%14 = load i32*, i32** %13, align 8 | |
%15 = load i32, i32* %14, align 4 | |
%16 = getelementptr i8*, i8** %0, i64 3 | |
%17 = bitcast i8** %16 to double** | |
%18 = load double*, double** %17, align 8 | |
%19 = getelementptr i8*, i8** %0, i64 4 | |
%20 = bitcast i8** %19 to i32** | |
%21 = load i32*, i32** %20, align 8 | |
%22 = load i32, i32* %21, align 4 | |
%conv.i.i = trunc i64 %2 to i32 | |
%mul.i.i = mul nsw i32 %conv.i.i, -128 | |
%add4.i.i = add i32 %8, %mul.i.i | |
%mul9.i.i = shl nsw i32 %conv.i.i, 7 | |
br label %pregion_for_entry..i.i | |
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5 | |
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %32, %.r_exit.i.i.1 ] | |
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32 | |
%add.i.i = xor i32 %conv2.i.i, -1 | |
%add5.i.i = add i32 %add4.i.i, %add.i.i | |
%cmp.i.i = icmp sgt i32 %add5.i.i, -1 | |
br i1 %cmp.i.i, label %23, label %.r_exit.i.i | |
; <label>:23: ; preds = %pregion_for_entry..i.i | |
%add10.i.i = add nuw nsw i32 %mul9.i.i, %conv2.i.i | |
%add13.i.i = add i32 %add10.i.i, %22 | |
%idxprom.i.i = sext i32 %add13.i.i to i64 | |
%arrayidx.i.i = getelementptr inbounds double, double* %18, i64 %idxprom.i.i | |
%24 = bitcast double* %arrayidx.i.i to i64* | |
%25 = load i64, i64* %24, align 8, !tbaa !12, !alias.scope !33, !noalias !38, !llvm.mem.parallel_loop_access !21 | |
%add20.i.i = add i32 %add10.i.i, %15 | |
%idxprom21.i.i = sext i32 %add20.i.i to i64 | |
%arrayidx22.i.i = getelementptr inbounds double, double* %11, i64 %idxprom21.i.i | |
%26 = bitcast double* %arrayidx22.i.i to i64* | |
store i64 %25, i64* %26, align 8, !tbaa !12, !alias.scope !38, !noalias !33, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.i | |
.r_exit.i.i: ; preds = %23, %pregion_for_entry..i.i | |
%27 = trunc i64 %_local_id_x.i.0 to i32 | |
%add.i.i.1 = xor i32 %27, -2 | |
%add5.i.i.1 = add i32 %add4.i.i, %add.i.i.1 | |
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1 | |
br i1 %cmp.i.i.1, label %28, label %.r_exit.i.i.1 | |
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i.1 | |
ret void | |
; <label>:28: ; preds = %.r_exit.i.i | |
%conv2.i.i.1 = or i32 %27, 1 | |
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1 | |
%add13.i.i.1 = add i32 %add10.i.i.1, %22 | |
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64 | |
%arrayidx.i.i.1 = getelementptr inbounds double, double* %18, i64 %idxprom.i.i.1 | |
%29 = bitcast double* %arrayidx.i.i.1 to i64* | |
%30 = load i64, i64* %29, align 8, !tbaa !12, !alias.scope !33, !noalias !38, !llvm.mem.parallel_loop_access !21 | |
%add20.i.i.1 = add i32 %add10.i.i.1, %15 | |
%idxprom21.i.i.1 = sext i32 %add20.i.i.1 to i64 | |
%arrayidx22.i.i.1 = getelementptr inbounds double, double* %11, i64 %idxprom21.i.i.1 | |
%31 = bitcast double* %arrayidx22.i.i.1 to i64* | |
store i64 %30, i64* %31, align 8, !tbaa !12, !alias.scope !38, !noalias !33, !llvm.mem.parallel_loop_access !21 | |
br label %.r_exit.i.i.1 | |
.r_exit.i.i.1: ; preds = %28, %.r_exit.i.i | |
%32 = add nuw nsw i64 %_local_id_x.i.0, 2 | |
%exitcond.1 = icmp eq i64 %32, 128 | |
br i1 %exitcond.1, label %_pocl_kernel_grudge_assign_0.exit, label %pregion_for_entry..i.i, !llvm.loop !22 | |
} | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>) #2 | |
attributes #0 = { alwaysinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #1 = { norecurse nounwind } | |
attributes #2 = { argmemonly nounwind } | |
!llvm.module.flags = !{!0, !1, !2} | |
!opencl.ocl.version = !{!3} | |
!llvm.ident = !{!4} | |
!opencl.spir.version = !{!3} | |
!0 = !{i32 1, !"wchar_size", i32 4} | |
!1 = !{i32 7, !"PIC Level", i32 2} | |
!2 = !{i32 7, !"PIE Level", i32 2} | |
!3 = !{i32 1, i32 2} | |
!4 = !{!"clang version 6.0.1 (tags/RELEASE_601/final)"} | |
!5 = !{i32 0, i32 1, i32 0, i32 1, i32 0} | |
!6 = !{!"none", !"none", !"none", !"none", !"none"} | |
!7 = !{!"int", !"double*", !"int", !"double*", !"int"} | |
!8 = !{!"", !"restrict", !"", !"restrict const", !""} | |
!9 = !{!"grdg_n", !"expr_8", !"expr_8_offset", !"grdg_sub_discr_dx0_dr0", !"grdg_sub_discr_dx0_dr0_offset"} | |
!10 = !{i32 128, i32 1, i32 1} | |
!11 = !{i32 1} | |
!12 = !{!13, !13, i64 0} | |
!13 = !{!"double", !14, i64 0} | |
!14 = !{!"omnipotent char", !15, i64 0} | |
!15 = !{!"Simple C/C++ TBAA"} | |
!16 = !{!17} | |
!17 = distinct !{!17, !18, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!18 = distinct !{!18, !"grudge_assign_0"} | |
!19 = !{!20} | |
!20 = distinct !{!20, !18, !"grudge_assign_0: %expr_8"} | |
!21 = !{!22} | |
!22 = distinct !{!22} | |
!23 = distinct !{!23, !24} | |
!24 = !{!"llvm.loop.isvectorized", i32 1} | |
!25 = !{!26, !28} | |
!26 = distinct !{!26, !27, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!27 = distinct !{!27, !"grudge_assign_0"} | |
!28 = distinct !{!28, !29, !"_pocl_kernel_grudge_assign_0: argument 1"} | |
!29 = distinct !{!29, !"_pocl_kernel_grudge_assign_0"} | |
!30 = !{!31, !32} | |
!31 = distinct !{!31, !27, !"grudge_assign_0: %expr_8"} | |
!32 = distinct !{!32, !29, !"_pocl_kernel_grudge_assign_0: argument 0"} | |
!33 = !{!34, !36} | |
!34 = distinct !{!34, !35, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"} | |
!35 = distinct !{!35, !"grudge_assign_0"} | |
!36 = distinct !{!36, !37, !"_pocl_kernel_grudge_assign_0: argument 1"} | |
!37 = distinct !{!37, !"_pocl_kernel_grudge_assign_0"} | |
!38 = !{!39, !40} | |
!39 = distinct !{!39, !35, !"grudge_assign_0: %expr_8"} | |
!40 = distinct !{!40, !37, !"_pocl_kernel_grudge_assign_0: argument 0"} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment