Skip to content

Instantly share code, notes, and snippets.

@annanay25
Last active July 3, 2017 07:44
Show Gist options
  • Save annanay25/2b66130468346bfd0999afc90c3c5828 to your computer and use it in GitHub Desktop.
Save annanay25/2b66130468346bfd0999afc90c3c5828 to your computer and use it in GitHub Desktop.
Polly enabled optimizations in XLA for mat_add.ll
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux_gnu"
; Function Attrs: norecurse nounwind
define void @"cluster_2[_XlaCompiledKernel=true,_XlaNumConstantArgs=0].v5"(i8* nocapture align 16 dereferenceable(3136) %retval, i8* noalias nocapture readnone %run_options, i8** noalias nocapture readonly %params, i8** noalias nocapture readnone %temps, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0 {
entry:
%0 = bitcast i8** %params to [1 x [784 x float]]**
%1 = load [1 x [784 x float]]*, [1 x [784 x float]]** %0, align 8, !tbaa !0, !dereferenceable !3, !align !4
%2 = getelementptr inbounds i8*, i8** %params, i64 1
%3 = bitcast i8** %2 to float**
%4 = load float*, float** %3, align 8, !tbaa !5, !dereferenceable !7, !align !8
%5 = bitcast i8* %retval to [1 x [784 x float]]*
%6 = load float, float* %4, align 8, !tbaa !9, !invariant.load !11, !noalias !12
%broadcast.splatinsert10 = insertelement <8 x float> undef, float %6, i32 0
%broadcast.splat11 = shufflevector <8 x float> %broadcast.splatinsert10, <8 x float> undef, <8 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next.2, %vector.body ]
%7 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 %index
%8 = bitcast float* %7 to <8 x float>*
%wide.load = load <8 x float>, <8 x float>* %8, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%9 = getelementptr float, float* %7, i64 8
%10 = bitcast float* %9 to <8 x float>*
%wide.load7 = load <8 x float>, <8 x float>* %10, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%11 = getelementptr float, float* %7, i64 16
%12 = bitcast float* %11 to <8 x float>*
%wide.load8 = load <8 x float>, <8 x float>* %12, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%13 = getelementptr float, float* %7, i64 24
%14 = bitcast float* %13 to <8 x float>*
%wide.load9 = load <8 x float>, <8 x float>* %14, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%15 = fadd fast <8 x float> %broadcast.splat11, %wide.load
%16 = fadd fast <8 x float> %broadcast.splat11, %wide.load7
%17 = fadd fast <8 x float> %broadcast.splat11, %wide.load8
%18 = fadd fast <8 x float> %broadcast.splat11, %wide.load9
%19 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %5, i64 0, i64 0, i64 %index
%20 = bitcast float* %19 to <8 x float>*
store <8 x float> %15, <8 x float>* %20, align 16, !tbaa !15, !alias.scope !12
%21 = getelementptr float, float* %19, i64 8
%22 = bitcast float* %21 to <8 x float>*
store <8 x float> %16, <8 x float>* %22, align 16, !tbaa !15, !alias.scope !12
%23 = getelementptr float, float* %19, i64 16
%24 = bitcast float* %23 to <8 x float>*
store <8 x float> %17, <8 x float>* %24, align 16, !tbaa !15, !alias.scope !12
%25 = getelementptr float, float* %19, i64 24
%26 = bitcast float* %25 to <8 x float>*
store <8 x float> %18, <8 x float>* %26, align 16, !tbaa !15, !alias.scope !12
%index.next = add nuw nsw i64 %index, 32
%27 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 %index.next
%28 = bitcast float* %27 to <8 x float>*
%wide.load.1 = load <8 x float>, <8 x float>* %28, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%29 = getelementptr float, float* %27, i64 8
%30 = bitcast float* %29 to <8 x float>*
%wide.load7.1 = load <8 x float>, <8 x float>* %30, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%31 = getelementptr float, float* %27, i64 16
%32 = bitcast float* %31 to <8 x float>*
%wide.load8.1 = load <8 x float>, <8 x float>* %32, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%33 = getelementptr float, float* %27, i64 24
%34 = bitcast float* %33 to <8 x float>*
%wide.load9.1 = load <8 x float>, <8 x float>* %34, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%35 = fadd fast <8 x float> %broadcast.splat11, %wide.load.1
%36 = fadd fast <8 x float> %broadcast.splat11, %wide.load7.1
%37 = fadd fast <8 x float> %broadcast.splat11, %wide.load8.1
%38 = fadd fast <8 x float> %broadcast.splat11, %wide.load9.1
%39 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %5, i64 0, i64 0, i64 %index.next
%40 = bitcast float* %39 to <8 x float>*
store <8 x float> %35, <8 x float>* %40, align 16, !tbaa !15, !alias.scope !12
%41 = getelementptr float, float* %39, i64 8
%42 = bitcast float* %41 to <8 x float>*
store <8 x float> %36, <8 x float>* %42, align 16, !tbaa !15, !alias.scope !12
%43 = getelementptr float, float* %39, i64 16
%44 = bitcast float* %43 to <8 x float>*
store <8 x float> %37, <8 x float>* %44, align 16, !tbaa !15, !alias.scope !12
%45 = getelementptr float, float* %39, i64 24
%46 = bitcast float* %45 to <8 x float>*
store <8 x float> %38, <8 x float>* %46, align 16, !tbaa !15, !alias.scope !12
%index.next.1 = add nsw i64 %index, 64
%47 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 %index.next.1
%48 = bitcast float* %47 to <8 x float>*
%wide.load.2 = load <8 x float>, <8 x float>* %48, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%49 = getelementptr float, float* %47, i64 8
%50 = bitcast float* %49 to <8 x float>*
%wide.load7.2 = load <8 x float>, <8 x float>* %50, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%51 = getelementptr float, float* %47, i64 16
%52 = bitcast float* %51 to <8 x float>*
%wide.load8.2 = load <8 x float>, <8 x float>* %52, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%53 = getelementptr float, float* %47, i64 24
%54 = bitcast float* %53 to <8 x float>*
%wide.load9.2 = load <8 x float>, <8 x float>* %54, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%55 = fadd fast <8 x float> %broadcast.splat11, %wide.load.2
%56 = fadd fast <8 x float> %broadcast.splat11, %wide.load7.2
%57 = fadd fast <8 x float> %broadcast.splat11, %wide.load8.2
%58 = fadd fast <8 x float> %broadcast.splat11, %wide.load9.2
%59 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %5, i64 0, i64 0, i64 %index.next.1
%60 = bitcast float* %59 to <8 x float>*
store <8 x float> %55, <8 x float>* %60, align 16, !tbaa !15, !alias.scope !12
%61 = getelementptr float, float* %59, i64 8
%62 = bitcast float* %61 to <8 x float>*
store <8 x float> %56, <8 x float>* %62, align 16, !tbaa !15, !alias.scope !12
%63 = getelementptr float, float* %59, i64 16
%64 = bitcast float* %63 to <8 x float>*
store <8 x float> %57, <8 x float>* %64, align 16, !tbaa !15, !alias.scope !12
%65 = getelementptr float, float* %59, i64 24
%66 = bitcast float* %65 to <8 x float>*
store <8 x float> %58, <8 x float>* %66, align 16, !tbaa !15, !alias.scope !12
%index.next.2 = add nsw i64 %index, 96
%67 = icmp eq i64 %index.next.2, 768
br i1 %67, label %loop_body.dim.1.preheader, label %vector.body, !llvm.loop !17
loop_body.dim.1.preheader: ; preds = %vector.body
br label %loop_body.dim.1
loop_body.dim.1: ; preds = %loop_body.dim.1.preheader
%68 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 768
%69 = load float, float* %68, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%70 = fadd fast float %6, %69
%71 = getelementptr inbounds i8, i8* %retval, i64 3072
%72 = bitcast i8* %71 to float*
store float %70, float* %72, align 16, !tbaa !15, !alias.scope !12
%73 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 769
%74 = load float, float* %73, align 4, !tbaa !15, !invariant.load !11, !noalias !12
%75 = fadd fast float %6, %74
%76 = getelementptr inbounds i8, i8* %retval, i64 3076
%77 = bitcast i8* %76 to float*
store float %75, float* %77, align 4, !tbaa !15, !alias.scope !12
%78 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 770
%79 = load float, float* %78, align 8, !tbaa !15, !invariant.load !11, !noalias !12
%80 = fadd fast float %6, %79
%81 = getelementptr inbounds i8, i8* %retval, i64 3080
%82 = bitcast i8* %81 to float*
store float %80, float* %82, align 8, !tbaa !15, !alias.scope !12
%83 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 771
%84 = load float, float* %83, align 4, !tbaa !15, !invariant.load !11, !noalias !12
%85 = fadd fast float %6, %84
%86 = getelementptr inbounds i8, i8* %retval, i64 3084
%87 = bitcast i8* %86 to float*
store float %85, float* %87, align 4, !tbaa !15, !alias.scope !12
%88 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 772
%89 = load float, float* %88, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%90 = fadd fast float %6, %89
%91 = getelementptr inbounds i8, i8* %retval, i64 3088
%92 = bitcast i8* %91 to float*
store float %90, float* %92, align 16, !tbaa !15, !alias.scope !12
%93 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 773
%94 = load float, float* %93, align 4, !tbaa !15, !invariant.load !11, !noalias !12
%95 = fadd fast float %6, %94
%96 = getelementptr inbounds i8, i8* %retval, i64 3092
%97 = bitcast i8* %96 to float*
store float %95, float* %97, align 4, !tbaa !15, !alias.scope !12
%98 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 774
%99 = load float, float* %98, align 8, !tbaa !15, !invariant.load !11, !noalias !12
%100 = fadd fast float %6, %99
%101 = getelementptr inbounds i8, i8* %retval, i64 3096
%102 = bitcast i8* %101 to float*
store float %100, float* %102, align 8, !tbaa !15, !alias.scope !12
%103 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 775
%104 = load float, float* %103, align 4, !tbaa !15, !invariant.load !11, !noalias !12
%105 = fadd fast float %6, %104
%106 = getelementptr inbounds i8, i8* %retval, i64 3100
%107 = bitcast i8* %106 to float*
store float %105, float* %107, align 4, !tbaa !15, !alias.scope !12
%108 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 776
%109 = load float, float* %108, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%110 = fadd fast float %6, %109
%111 = getelementptr inbounds i8, i8* %retval, i64 3104
%112 = bitcast i8* %111 to float*
store float %110, float* %112, align 16, !tbaa !15, !alias.scope !12
%113 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 777
%114 = load float, float* %113, align 4, !tbaa !15, !invariant.load !11, !noalias !12
%115 = fadd fast float %6, %114
%116 = getelementptr inbounds i8, i8* %retval, i64 3108
%117 = bitcast i8* %116 to float*
store float %115, float* %117, align 4, !tbaa !15, !alias.scope !12
%118 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 778
%119 = load float, float* %118, align 8, !tbaa !15, !invariant.load !11, !noalias !12
%120 = fadd fast float %6, %119
%121 = getelementptr inbounds i8, i8* %retval, i64 3112
%122 = bitcast i8* %121 to float*
store float %120, float* %122, align 8, !tbaa !15, !alias.scope !12
%123 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 779
%124 = load float, float* %123, align 4, !tbaa !15, !invariant.load !11, !noalias !12
%125 = fadd fast float %6, %124
%126 = getelementptr inbounds i8, i8* %retval, i64 3116
%127 = bitcast i8* %126 to float*
store float %125, float* %127, align 4, !tbaa !15, !alias.scope !12
%128 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 780
%129 = load float, float* %128, align 16, !tbaa !15, !invariant.load !11, !noalias !12
%130 = fadd fast float %6, %129
%131 = getelementptr inbounds i8, i8* %retval, i64 3120
%132 = bitcast i8* %131 to float*
store float %130, float* %132, align 16, !tbaa !15, !alias.scope !12
%133 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 781
%134 = load float, float* %133, align 4, !tbaa !15, !invariant.load !11, !noalias !12
%135 = fadd fast float %6, %134
%136 = getelementptr inbounds i8, i8* %retval, i64 3124
%137 = bitcast i8* %136 to float*
store float %135, float* %137, align 4, !tbaa !15, !alias.scope !12
%138 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 782
%139 = load float, float* %138, align 8, !tbaa !15, !invariant.load !11, !noalias !12
%140 = fadd fast float %6, %139
%141 = getelementptr inbounds i8, i8* %retval, i64 3128
%142 = bitcast i8* %141 to float*
store float %140, float* %142, align 8, !tbaa !15, !alias.scope !12
%143 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 783
%144 = load float, float* %143, align 4, !tbaa !15, !invariant.load !11, !noalias !12
%145 = fadd fast float %6, %144
%146 = getelementptr inbounds i8, i8* %retval, i64 3132
%147 = bitcast i8* %146 to float*
store float %145, float* %147, align 4, !tbaa !15, !alias.scope !12
ret void
}
attributes #0 = { norecurse nounwind }
!0 = !{!1, !1, i64 0}
!1 = !{!"pointer-to element_type: F32 dimensions: 1 dimensions: 784 layout { minor_to_major: 1 minor_to_major: 0 }", !2}
!2 = !{!"XLA TBAA"}
!3 = !{i64 3136}
!4 = !{i64 16}
!5 = !{!6, !6, i64 0}
!6 = !{!"pointer-to element_type: F32", !2}
!7 = !{i64 4}
!8 = !{i64 8}
!9 = !{!10, !10, i64 0}
!10 = !{!"element_type: F32", !2}
!11 = !{}
!12 = !{!13}
!13 = !{!"buffer: 1", !14}
!14 = distinct !{!14}
!15 = !{!16, !16, i64 0}
!16 = !{!"element_type: F32 dimensions: 1 dimensions: 784 layout { minor_to_major: 1 minor_to_major: 0 }", !2}
!17 = distinct !{!17, !18, !19}
!18 = !{!"llvm.loop.vectorize.width", i32 1}
!19 = !{!"llvm.loop.interleave.count", i32 1}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment