Skip to content

Instantly share code, notes, and snippets.

; ModuleID = 'cluster_2.ll'
source_filename = "__compute_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux_gnu"
@0 = private constant [1 x [28 x [28 x [1 x float]]]] [[28 x [28 x [1 x float]]] [[28 x [1 x float]] zeroinitializer, [28 x [1 x float]] zeroinitializer, [28 x [1 x float]] zeroinitializer, [28 x [1 x float]] zeroinitializer, [28 x [1 x float]] zeroinitializer, [28 x [1 x float]] zeroinitializer, [28 x [1 x float]] zeroinitializer, [28 x [1 x float]] [[1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] zeroinitializer, [1 x float] [float 0x3FD8585880000000], [1 x float] [float 0x3FD8181840000000], [1 x float] [float 0x3FD3535360000000], [1 x float] [float 0x3FDD9D9DC0000000], [1 x float] [float 0x3
@annanay25
annanay25 / isl_union_map_coalesce_inefficient.c
Last active July 21, 2017 07:08
isl_union_map_coalesce sub-optimally merges 4 pieces in 'test' to 2 pieces. A simpler 1 piece solution is 'orig_simplified'
#include <stdio.h>
#include <stdlib.h>
#include "isl/flow.h"
#include "isl/ctx.h"
#include "isl/val.h"
#include "isl/set.h"
#include "isl/map.h"
#include "isl/union_map.h"
#include "isl/union_set.h"
#include "isl/flow.h"
diff --git a/lib/Analysis/ScopInfo.cpp b/lib/Analysis/ScopInfo.cpp
index 18e8a89..da939e6 100644
--- a/lib/Analysis/ScopInfo.cpp
+++ b/lib/Analysis/ScopInfo.cpp
@@ -1584,12 +1584,104 @@ static bool buildConditionSets(
// to be set. The comparison is equal to a signed comparison under this
// assumption.
bool NonNeg = ICond->isUnsigned();
- LHS = getPwAff(S, BB, InvalidDomainMap,
- SE.getSCEVAtScope(ICond->getOperand(0), L), NonNeg);
@annanay25
annanay25 / polly_opt_2x2_matmul.ll
Created July 4, 2017 11:10
*-polly-process-unprofitable* version of - Resultant [2,2] matrix on multiplication of [2,784] x [784,2]
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux_gnu"
; Function Attrs: norecurse nounwind
define void @"cluster_2[_XlaCompiledKernel=true,_XlaNumConstantArgs=0].v5"(i8* nocapture align 16 dereferenceable(16) %retval, i8* noalias nocapture readnone %run_options, i8** noalias nocapture readonly %params, i8** noalias nocapture readnone %temps, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0 {
entry:
%0 = getelementptr inbounds i8*, i8** %params, i64 1
%1 = load i8*, i8** %0, align 8, !tbaa !0, !dereferenceable !3, !align !4
@annanay25
annanay25 / 2x2_matmul.ll
Created July 4, 2017 11:09
Resultant [2,2] matrix on multiplication of [2,784] x [784,2]
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux_gnu"
define void @"cluster_2[_XlaCompiledKernel=true,_XlaNumConstantArgs=0].v5"(i8* align 16 dereferenceable(16) %retval, i8* noalias %run_options, i8** noalias %params, i8** noalias %temps, i64* noalias %prof_counters) {
entry:
%accum_address = alloca float
%invar_address.reduction = alloca i64
%invar_address.rhs1 = alloca i64
@annanay25
annanay25 / Conv_pseudo.txt
Created July 3, 2017 18:49
Pseudo code for Conv kernel.
Input = IMG: M x N X P KERNEL: X x Y x P <- Z such kernels.
for (a = Z)
for (i = M)
for (j = N)
for (k = X)
for (l = Y)
for (r = P)
sum += IMG[ i -X/2 + k , j - Y/2 + l , r ] * KERNEL [ k , l , r ]
ConvOutput[ i , j , a ] = sum
@annanay25
annanay25 / polly_opt_mat_add.ll
Created July 3, 2017 13:17
-polly-process-unprofitable for adding a number (float 5.0) to every element of a [1,784] array.
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux_gnu"
; Function Attrs: norecurse nounwind
define void @"cluster_2[_XlaCompiledKernel=true,_XlaNumConstantArgs=0].v5"(i8* nocapture align 16 dereferenceable(3136) %retval, i8* noalias nocapture readnone %run_options, i8** noalias nocapture readonly %params, i8** noalias nocapture readnone %temps, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0 {
entry:
%0 = load i8*, i8** %params, align 8, !tbaa !0, !dereferenceable !3, !align !4
%1 = getelementptr inbounds i8*, i8** %params, i64 1
@annanay25
annanay25 / mat_add.ll
Created July 3, 2017 07:36
Adding a scalar (float 5.0) to every element of a [1,784] array.
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux_gnu"
define void @"cluster_2[_XlaCompiledKernel=true,_XlaNumConstantArgs=0].v5"(i8* align 16 dereferenceable(3136) %retval, i8* noalias %run_options, i8** noalias %params, i8** noalias %temps, i64* noalias %prof_counters) {
entry:
%invar_address.dim.1 = alloca i64
%invar_address.dim.0 = alloca i64
%0 = getelementptr inbounds i8*, i8** %params, i64 0
@annanay25
annanay25 / optimized_mat_add.ll
Last active July 3, 2017 07:44
Polly enabled optimizations in XLA for mat_add.ll
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux_gnu"
; Function Attrs: norecurse nounwind
define void @"cluster_2[_XlaCompiledKernel=true,_XlaNumConstantArgs=0].v5"(i8* nocapture align 16 dereferenceable(3136) %retval, i8* noalias nocapture readnone %run_options, i8** noalias nocapture readonly %params, i8** noalias nocapture readnone %temps, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0 {
entry:
%0 = bitcast i8** %params to [1 x [784 x float]]**
%1 = load [1 x [784 x float]]*, [1 x [784 x float]]** %0, align 8, !tbaa !0, !dereferenceable !3, !align !4
@annanay25
annanay25 / matscale.ll
Last active July 3, 2017 07:45
Multiply every element of a [1,784] matrix by a constant (float 5.0)
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux_gnu"
define void @"cluster_2[_XlaCompiledKernel=true,_XlaNumConstantArgs=0].v5"(i8* align 16 dereferenceable(3136) %retval, i8* noalias %run_options, i8** noalias %params, i8** noalias %temps, i64* noalias %prof_counters) {
entry:
%invar_address.dim.1 = alloca i64
%invar_address.dim.0 = alloca i64