Last active
July 3, 2017 07:44
-
-
Save annanay25/2b66130468346bfd0999afc90c3c5828 to your computer and use it in GitHub Desktop.
Polly enabled optimizations in XLA for mat_add.ll
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = '__compute_module' | |
source_filename = "__compute_module" | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux_gnu" | |
; Function Attrs: norecurse nounwind | |
define void @"cluster_2[_XlaCompiledKernel=true,_XlaNumConstantArgs=0].v5"(i8* nocapture align 16 dereferenceable(3136) %retval, i8* noalias nocapture readnone %run_options, i8** noalias nocapture readonly %params, i8** noalias nocapture readnone %temps, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0 { | |
entry: | |
%0 = bitcast i8** %params to [1 x [784 x float]]** | |
%1 = load [1 x [784 x float]]*, [1 x [784 x float]]** %0, align 8, !tbaa !0, !dereferenceable !3, !align !4 | |
%2 = getelementptr inbounds i8*, i8** %params, i64 1 | |
%3 = bitcast i8** %2 to float** | |
%4 = load float*, float** %3, align 8, !tbaa !5, !dereferenceable !7, !align !8 | |
%5 = bitcast i8* %retval to [1 x [784 x float]]* | |
%6 = load float, float* %4, align 8, !tbaa !9, !invariant.load !11, !noalias !12 | |
%broadcast.splatinsert10 = insertelement <8 x float> undef, float %6, i32 0 | |
%broadcast.splat11 = shufflevector <8 x float> %broadcast.splatinsert10, <8 x float> undef, <8 x i32> zeroinitializer | |
br label %vector.body | |
vector.body: ; preds = %vector.body, %entry | |
%index = phi i64 [ 0, %entry ], [ %index.next.2, %vector.body ] | |
%7 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 %index | |
%8 = bitcast float* %7 to <8 x float>* | |
%wide.load = load <8 x float>, <8 x float>* %8, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%9 = getelementptr float, float* %7, i64 8 | |
%10 = bitcast float* %9 to <8 x float>* | |
%wide.load7 = load <8 x float>, <8 x float>* %10, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%11 = getelementptr float, float* %7, i64 16 | |
%12 = bitcast float* %11 to <8 x float>* | |
%wide.load8 = load <8 x float>, <8 x float>* %12, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%13 = getelementptr float, float* %7, i64 24 | |
%14 = bitcast float* %13 to <8 x float>* | |
%wide.load9 = load <8 x float>, <8 x float>* %14, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%15 = fadd fast <8 x float> %broadcast.splat11, %wide.load | |
%16 = fadd fast <8 x float> %broadcast.splat11, %wide.load7 | |
%17 = fadd fast <8 x float> %broadcast.splat11, %wide.load8 | |
%18 = fadd fast <8 x float> %broadcast.splat11, %wide.load9 | |
%19 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %5, i64 0, i64 0, i64 %index | |
%20 = bitcast float* %19 to <8 x float>* | |
store <8 x float> %15, <8 x float>* %20, align 16, !tbaa !15, !alias.scope !12 | |
%21 = getelementptr float, float* %19, i64 8 | |
%22 = bitcast float* %21 to <8 x float>* | |
store <8 x float> %16, <8 x float>* %22, align 16, !tbaa !15, !alias.scope !12 | |
%23 = getelementptr float, float* %19, i64 16 | |
%24 = bitcast float* %23 to <8 x float>* | |
store <8 x float> %17, <8 x float>* %24, align 16, !tbaa !15, !alias.scope !12 | |
%25 = getelementptr float, float* %19, i64 24 | |
%26 = bitcast float* %25 to <8 x float>* | |
store <8 x float> %18, <8 x float>* %26, align 16, !tbaa !15, !alias.scope !12 | |
%index.next = add nuw nsw i64 %index, 32 | |
%27 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 %index.next | |
%28 = bitcast float* %27 to <8 x float>* | |
%wide.load.1 = load <8 x float>, <8 x float>* %28, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%29 = getelementptr float, float* %27, i64 8 | |
%30 = bitcast float* %29 to <8 x float>* | |
%wide.load7.1 = load <8 x float>, <8 x float>* %30, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%31 = getelementptr float, float* %27, i64 16 | |
%32 = bitcast float* %31 to <8 x float>* | |
%wide.load8.1 = load <8 x float>, <8 x float>* %32, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%33 = getelementptr float, float* %27, i64 24 | |
%34 = bitcast float* %33 to <8 x float>* | |
%wide.load9.1 = load <8 x float>, <8 x float>* %34, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%35 = fadd fast <8 x float> %broadcast.splat11, %wide.load.1 | |
%36 = fadd fast <8 x float> %broadcast.splat11, %wide.load7.1 | |
%37 = fadd fast <8 x float> %broadcast.splat11, %wide.load8.1 | |
%38 = fadd fast <8 x float> %broadcast.splat11, %wide.load9.1 | |
%39 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %5, i64 0, i64 0, i64 %index.next | |
%40 = bitcast float* %39 to <8 x float>* | |
store <8 x float> %35, <8 x float>* %40, align 16, !tbaa !15, !alias.scope !12 | |
%41 = getelementptr float, float* %39, i64 8 | |
%42 = bitcast float* %41 to <8 x float>* | |
store <8 x float> %36, <8 x float>* %42, align 16, !tbaa !15, !alias.scope !12 | |
%43 = getelementptr float, float* %39, i64 16 | |
%44 = bitcast float* %43 to <8 x float>* | |
store <8 x float> %37, <8 x float>* %44, align 16, !tbaa !15, !alias.scope !12 | |
%45 = getelementptr float, float* %39, i64 24 | |
%46 = bitcast float* %45 to <8 x float>* | |
store <8 x float> %38, <8 x float>* %46, align 16, !tbaa !15, !alias.scope !12 | |
%index.next.1 = add nsw i64 %index, 64 | |
%47 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 %index.next.1 | |
%48 = bitcast float* %47 to <8 x float>* | |
%wide.load.2 = load <8 x float>, <8 x float>* %48, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%49 = getelementptr float, float* %47, i64 8 | |
%50 = bitcast float* %49 to <8 x float>* | |
%wide.load7.2 = load <8 x float>, <8 x float>* %50, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%51 = getelementptr float, float* %47, i64 16 | |
%52 = bitcast float* %51 to <8 x float>* | |
%wide.load8.2 = load <8 x float>, <8 x float>* %52, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%53 = getelementptr float, float* %47, i64 24 | |
%54 = bitcast float* %53 to <8 x float>* | |
%wide.load9.2 = load <8 x float>, <8 x float>* %54, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%55 = fadd fast <8 x float> %broadcast.splat11, %wide.load.2 | |
%56 = fadd fast <8 x float> %broadcast.splat11, %wide.load7.2 | |
%57 = fadd fast <8 x float> %broadcast.splat11, %wide.load8.2 | |
%58 = fadd fast <8 x float> %broadcast.splat11, %wide.load9.2 | |
%59 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %5, i64 0, i64 0, i64 %index.next.1 | |
%60 = bitcast float* %59 to <8 x float>* | |
store <8 x float> %55, <8 x float>* %60, align 16, !tbaa !15, !alias.scope !12 | |
%61 = getelementptr float, float* %59, i64 8 | |
%62 = bitcast float* %61 to <8 x float>* | |
store <8 x float> %56, <8 x float>* %62, align 16, !tbaa !15, !alias.scope !12 | |
%63 = getelementptr float, float* %59, i64 16 | |
%64 = bitcast float* %63 to <8 x float>* | |
store <8 x float> %57, <8 x float>* %64, align 16, !tbaa !15, !alias.scope !12 | |
%65 = getelementptr float, float* %59, i64 24 | |
%66 = bitcast float* %65 to <8 x float>* | |
store <8 x float> %58, <8 x float>* %66, align 16, !tbaa !15, !alias.scope !12 | |
%index.next.2 = add nsw i64 %index, 96 | |
%67 = icmp eq i64 %index.next.2, 768 | |
br i1 %67, label %loop_body.dim.1.preheader, label %vector.body, !llvm.loop !17 | |
loop_body.dim.1.preheader: ; preds = %vector.body | |
br label %loop_body.dim.1 | |
loop_body.dim.1: ; preds = %loop_body.dim.1.preheader | |
%68 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 768 | |
%69 = load float, float* %68, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%70 = fadd fast float %6, %69 | |
%71 = getelementptr inbounds i8, i8* %retval, i64 3072 | |
%72 = bitcast i8* %71 to float* | |
store float %70, float* %72, align 16, !tbaa !15, !alias.scope !12 | |
%73 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 769 | |
%74 = load float, float* %73, align 4, !tbaa !15, !invariant.load !11, !noalias !12 | |
%75 = fadd fast float %6, %74 | |
%76 = getelementptr inbounds i8, i8* %retval, i64 3076 | |
%77 = bitcast i8* %76 to float* | |
store float %75, float* %77, align 4, !tbaa !15, !alias.scope !12 | |
%78 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 770 | |
%79 = load float, float* %78, align 8, !tbaa !15, !invariant.load !11, !noalias !12 | |
%80 = fadd fast float %6, %79 | |
%81 = getelementptr inbounds i8, i8* %retval, i64 3080 | |
%82 = bitcast i8* %81 to float* | |
store float %80, float* %82, align 8, !tbaa !15, !alias.scope !12 | |
%83 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 771 | |
%84 = load float, float* %83, align 4, !tbaa !15, !invariant.load !11, !noalias !12 | |
%85 = fadd fast float %6, %84 | |
%86 = getelementptr inbounds i8, i8* %retval, i64 3084 | |
%87 = bitcast i8* %86 to float* | |
store float %85, float* %87, align 4, !tbaa !15, !alias.scope !12 | |
%88 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 772 | |
%89 = load float, float* %88, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%90 = fadd fast float %6, %89 | |
%91 = getelementptr inbounds i8, i8* %retval, i64 3088 | |
%92 = bitcast i8* %91 to float* | |
store float %90, float* %92, align 16, !tbaa !15, !alias.scope !12 | |
%93 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 773 | |
%94 = load float, float* %93, align 4, !tbaa !15, !invariant.load !11, !noalias !12 | |
%95 = fadd fast float %6, %94 | |
%96 = getelementptr inbounds i8, i8* %retval, i64 3092 | |
%97 = bitcast i8* %96 to float* | |
store float %95, float* %97, align 4, !tbaa !15, !alias.scope !12 | |
%98 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 774 | |
%99 = load float, float* %98, align 8, !tbaa !15, !invariant.load !11, !noalias !12 | |
%100 = fadd fast float %6, %99 | |
%101 = getelementptr inbounds i8, i8* %retval, i64 3096 | |
%102 = bitcast i8* %101 to float* | |
store float %100, float* %102, align 8, !tbaa !15, !alias.scope !12 | |
%103 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 775 | |
%104 = load float, float* %103, align 4, !tbaa !15, !invariant.load !11, !noalias !12 | |
%105 = fadd fast float %6, %104 | |
%106 = getelementptr inbounds i8, i8* %retval, i64 3100 | |
%107 = bitcast i8* %106 to float* | |
store float %105, float* %107, align 4, !tbaa !15, !alias.scope !12 | |
%108 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 776 | |
%109 = load float, float* %108, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%110 = fadd fast float %6, %109 | |
%111 = getelementptr inbounds i8, i8* %retval, i64 3104 | |
%112 = bitcast i8* %111 to float* | |
store float %110, float* %112, align 16, !tbaa !15, !alias.scope !12 | |
%113 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 777 | |
%114 = load float, float* %113, align 4, !tbaa !15, !invariant.load !11, !noalias !12 | |
%115 = fadd fast float %6, %114 | |
%116 = getelementptr inbounds i8, i8* %retval, i64 3108 | |
%117 = bitcast i8* %116 to float* | |
store float %115, float* %117, align 4, !tbaa !15, !alias.scope !12 | |
%118 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 778 | |
%119 = load float, float* %118, align 8, !tbaa !15, !invariant.load !11, !noalias !12 | |
%120 = fadd fast float %6, %119 | |
%121 = getelementptr inbounds i8, i8* %retval, i64 3112 | |
%122 = bitcast i8* %121 to float* | |
store float %120, float* %122, align 8, !tbaa !15, !alias.scope !12 | |
%123 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 779 | |
%124 = load float, float* %123, align 4, !tbaa !15, !invariant.load !11, !noalias !12 | |
%125 = fadd fast float %6, %124 | |
%126 = getelementptr inbounds i8, i8* %retval, i64 3116 | |
%127 = bitcast i8* %126 to float* | |
store float %125, float* %127, align 4, !tbaa !15, !alias.scope !12 | |
%128 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 780 | |
%129 = load float, float* %128, align 16, !tbaa !15, !invariant.load !11, !noalias !12 | |
%130 = fadd fast float %6, %129 | |
%131 = getelementptr inbounds i8, i8* %retval, i64 3120 | |
%132 = bitcast i8* %131 to float* | |
store float %130, float* %132, align 16, !tbaa !15, !alias.scope !12 | |
%133 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 781 | |
%134 = load float, float* %133, align 4, !tbaa !15, !invariant.load !11, !noalias !12 | |
%135 = fadd fast float %6, %134 | |
%136 = getelementptr inbounds i8, i8* %retval, i64 3124 | |
%137 = bitcast i8* %136 to float* | |
store float %135, float* %137, align 4, !tbaa !15, !alias.scope !12 | |
%138 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 782 | |
%139 = load float, float* %138, align 8, !tbaa !15, !invariant.load !11, !noalias !12 | |
%140 = fadd fast float %6, %139 | |
%141 = getelementptr inbounds i8, i8* %retval, i64 3128 | |
%142 = bitcast i8* %141 to float* | |
store float %140, float* %142, align 8, !tbaa !15, !alias.scope !12 | |
%143 = getelementptr inbounds [1 x [784 x float]], [1 x [784 x float]]* %1, i64 0, i64 0, i64 783 | |
%144 = load float, float* %143, align 4, !tbaa !15, !invariant.load !11, !noalias !12 | |
%145 = fadd fast float %6, %144 | |
%146 = getelementptr inbounds i8, i8* %retval, i64 3132 | |
%147 = bitcast i8* %146 to float* | |
store float %145, float* %147, align 4, !tbaa !15, !alias.scope !12 | |
ret void | |
} | |
attributes #0 = { norecurse nounwind } | |
!0 = !{!1, !1, i64 0} | |
!1 = !{!"pointer-to element_type: F32 dimensions: 1 dimensions: 784 layout { minor_to_major: 1 minor_to_major: 0 }", !2} | |
!2 = !{!"XLA TBAA"} | |
!3 = !{i64 3136} | |
!4 = !{i64 16} | |
!5 = !{!6, !6, i64 0} | |
!6 = !{!"pointer-to element_type: F32", !2} | |
!7 = !{i64 4} | |
!8 = !{i64 8} | |
!9 = !{!10, !10, i64 0} | |
!10 = !{!"element_type: F32", !2} | |
!11 = !{} | |
!12 = !{!13} | |
!13 = !{!"buffer: 1", !14} | |
!14 = distinct !{!14} | |
!15 = !{!16, !16, i64 0} | |
!16 = !{!"element_type: F32 dimensions: 1 dimensions: 784 layout { minor_to_major: 1 minor_to_major: 0 }", !2} | |
!17 = distinct !{!17, !18, !19} | |
!18 = !{!"llvm.loop.vectorize.width", i32 1} | |
!19 = !{!"llvm.loop.interleave.count", i32 1} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment