Skip to content

Instantly share code, notes, and snippets.

@nalimilan
Last active June 3, 2018 16:45
Show Gist options
  • Save nalimilan/5183c96a925dccec06a541b1f425b323 to your computer and use it in GitHub Desktop.
Save nalimilan/5183c96a925dccec06a541b1f425b323 to your computer and use it in GitHub Desktop.
julia> versioninfo()
Julia Version 0.6.3
Commit d55cadc350* (2018-05-28 20:20 UTC)
Platform Info:
OS: Linux (i686-redhat-linux)
CPU: Intel Core Processor (Skylake, IBRS)
WORD_SIZE: 32
BLAS: libopenblas (DYNAMIC_ARCH NO_AFFINITY Nehalem)
LAPACK: libopenblas
LIBM: libopenlibm
LLVM: libLLVM-3.9.1 (ORCJIT, skylake)
julia> @code_llvm wmedian([1, 2, 4, 7, 10, 15], [1/3, 1/3, 1/3, 1, 1, 1])
define { i8**, i8 } @julia_wmedian_65603([8 x i8]* noalias nocapture, i8** dereferenceable(24), i8** dereferenceable(24)) #0 !dbg !5 {
top:
%3 = alloca { double, i32 }, align 8
%ptls_i8 = call i8* asm "movl %gs:0, $0;\0Aaddl $$-5524, $0", "=r,~{dirflag},~{fpsr},~{flags}"() #5
%ptls = bitcast i8* %ptls_i8 to i8****
%4 = alloca [7 x i8**], align 4
%.sub = getelementptr inbounds [7 x i8**], [7 x i8**]* %4, i32 0, i32 0
%5 = getelementptr [7 x i8**], [7 x i8**]* %4, i32 0, i32 2
%6 = bitcast [7 x i8**]* %4 to i32*
%7 = bitcast i8*** %5 to i8*
call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 20, i32 4, i1 false)
store i32 10, i32* %6, align 4
%8 = getelementptr [7 x i8**], [7 x i8**]* %4, i32 0, i32 1
%9 = bitcast i8* %ptls_i8 to i32*
%10 = load i32, i32* %9, align 4
%11 = bitcast i8*** %8 to i32*
store i32 %10, i32* %11, align 4
store i8*** %.sub, i8**** %ptls, align 4
%12 = call double @julia__mapreduce_65604(i8** nonnull %2)
%13 = fmul double %12, 5.000000e-01
call void @julia_findmax_65606({ double, i32 }* noalias nocapture nonnull sret %3, i8** nonnull %2)
%14 = getelementptr inbounds { double, i32 }, { double, i32 }* %3, i32 0, i32 0
%15 = load double, double* %14, align 8
%16 = fcmp uge double %13, %15
br i1 %16, label %L18, label %if
if: ; preds = %top
%17 = getelementptr inbounds { double, i32 }, { double, i32 }* %3, i32 0, i32 1
%18 = load i32, i32* %17, align 8
%19 = add i32 %18, -1
%20 = getelementptr i8*, i8** %1, i32 4
%21 = bitcast i8** %20 to i32*
%22 = load i32, i32* %21, align 4
%23 = icmp ult i32 %19, %22
br i1 %23, label %idxend, label %oob
L18: ; preds = %top
%24 = getelementptr [7 x i8**], [7 x i8**]* %4, i32 0, i32 3
%25 = call i8** @"julia_#sortperm#11_65607"(i8** inttoptr (i32 -1318052740 to i8**), i8** inttoptr (i32 -1318053964 to i8**), i8 0, i8** inttoptr (i32 -1318052588 to i8**), i8** nonnull %1)
store i8** %25, i8*** %5, align 4
store i8** %25, i8*** %24, align 4
%26 = getelementptr inbounds i8*, i8** %25, i32 1
%27 = bitcast i8** %26 to i32*
%28 = load i32, i32* %27, align 4
%29 = icmp eq i32 %28, 0
br i1 %29, label %L67, label %if4.lr.ph
if4.lr.ph: ; preds = %L18
%30 = getelementptr i8*, i8** %25, i32 4
%31 = bitcast i8** %30 to i32*
%32 = load i32, i32* %31, align 4
%33 = bitcast i8** %25 to i32**
%34 = getelementptr i8*, i8** %2, i32 4
%35 = bitcast i8** %34 to i32*
%36 = load i32, i32* %35, align 4
%37 = bitcast i8** %2 to double**
%38 = load double*, double** %37, align 4
br label %if4
oob: ; preds = %if
%39 = alloca i32, align 4
store i32 %18, i32* %39, align 4
call void @jl_bounds_error_ints(i8** nonnull %1, i32* nonnull %39, i32 1)
unreachable
idxend: ; preds = %if
%40 = bitcast i8** %1 to i32**
%41 = load i32*, i32** %40, align 4
%42 = getelementptr i32, i32* %41, i32 %19
%43 = load i32, i32* %42, align 4
%44 = bitcast [8 x i8]* %0 to i8**
%45 = insertvalue { i8**, i8 } undef, i8** %44, 0
%46 = insertvalue { i8**, i8 } %45, i8 1, 1
%47 = bitcast [8 x i8]* %0 to i32*
store i32 %43, i32* %47, align 1
%48 = load i32, i32* %11, align 4
store i32 %48, i32* %9, align 4
ret { i8**, i8 } %46
if4: ; preds = %if4.lr.ph, %idxend12
%cumulative_weight.070 = phi double [ 0.000000e+00, %if4.lr.ph ], [ %68, %idxend12 ]
%"#temp#1.sroa.3.069" = phi i32 [ 1, %if4.lr.ph ], [ %53, %idxend12 ]
%49 = add i32 %"#temp#1.sroa.3.069", -1
%50 = icmp ult i32 %49, %32
br i1 %50, label %idxend6, label %oob5
L67.loopexit: ; preds = %idxend12, %idxend6
%i.1.ph = phi i32 [ %28, %idxend12 ], [ %53, %idxend6 ]
%cumulative_weight.1.ph = phi double [ %68, %idxend12 ], [ %cumulative_weight.070, %idxend6 ]
br label %L67
L67: ; preds = %L67.loopexit, %L18, %idxend10
%i.1 = phi i32 [ %"#temp#1.sroa.3.069", %idxend10 ], [ 0, %L18 ], [ %i.1.ph, %L67.loopexit ]
%cumulative_weight.1 = phi double [ %64, %idxend10 ], [ 0.000000e+00, %L18 ], [ %cumulative_weight.1.ph, %L67.loopexit ]
%51 = fcmp une double %cumulative_weight.1, %13
br i1 %51, label %L72, label %if13
oob5: ; preds = %if4
%52 = alloca i32, align 4
store i32 %"#temp#1.sroa.3.069", i32* %52, align 4
call void @jl_bounds_error_ints(i8** %25, i32* nonnull %52, i32 1)
unreachable
idxend6: ; preds = %if4
%53 = add i32 %"#temp#1.sroa.3.069", 1
%54 = fcmp une double %cumulative_weight.070, %13
br i1 %54, label %L55, label %L67.loopexit
L55: ; preds = %idxend6
%55 = load i32*, i32** %33, align 4
%56 = getelementptr i32, i32* %55, i32 %49
%57 = load i32, i32* %56, align 4
%58 = fcmp uge double %13, %cumulative_weight.070
%59 = add i32 %57, -1
%60 = icmp ult i32 %59, %36
br i1 %58, label %L62, label %if8
if8: ; preds = %L55
br i1 %60, label %idxend10, label %oob9
L62: ; preds = %L55
br i1 %60, label %idxend12, label %oob11
oob9: ; preds = %if8
%61 = alloca i32, align 4
store i32 %57, i32* %61, align 4
call void @jl_bounds_error_ints(i8** nonnull %2, i32* nonnull %61, i32 1)
unreachable
idxend10: ; preds = %if8
%62 = getelementptr double, double* %38, i32 %59
%63 = load double, double* %62, align 4
%64 = fsub double %cumulative_weight.070, %63
br label %L67
oob11: ; preds = %L62
%65 = alloca i32, align 4
store i32 %57, i32* %65, align 4
call void @jl_bounds_error_ints(i8** nonnull %2, i32* nonnull %65, i32 1)
unreachable
idxend12: ; preds = %L62
%66 = getelementptr double, double* %38, i32 %59
%67 = load double, double* %66, align 4
%68 = fadd double %cumulative_weight.070, %67
%69 = icmp eq i32 %"#temp#1.sroa.3.069", %28
br i1 %69, label %L67.loopexit, label %if4
if13: ; preds = %L67
%70 = getelementptr [7 x i8**], [7 x i8**]* %4, i32 0, i32 4
store i8** %25, i8*** %70, align 4
%71 = add i32 %i.1, -3
%72 = getelementptr i8*, i8** %25, i32 4
%73 = bitcast i8** %72 to i32*
%74 = load i32, i32* %73, align 4
%75 = icmp ult i32 %71, %74
br i1 %75, label %idxend15, label %oob14
L72: ; preds = %L67
%76 = getelementptr [7 x i8**], [7 x i8**]* %4, i32 0, i32 6
store i8** %25, i8*** %76, align 4
%77 = add i32 %i.1, -2
%78 = getelementptr i8*, i8** %25, i32 4
%79 = bitcast i8** %78 to i32*
%80 = load i32, i32* %79, align 4
%81 = icmp ult i32 %77, %80
br i1 %81, label %idxend27, label %oob26
oob14: ; preds = %if13
%82 = add i32 %i.1, -2
%83 = alloca i32, align 4
store i32 %82, i32* %83, align 4
call void @jl_bounds_error_ints(i8** %25, i32* nonnull %83, i32 1)
unreachable
idxend15: ; preds = %if13
%84 = bitcast i8** %25 to i32**
%85 = load i32*, i32** %84, align 4
%86 = getelementptr i32, i32* %85, i32 %71
%87 = load i32, i32* %86, align 4
%88 = add i32 %87, -1
%89 = getelementptr i8*, i8** %1, i32 4
%90 = bitcast i8** %89 to i32*
%91 = load i32, i32* %90, align 4
%92 = icmp ult i32 %88, %91
br i1 %92, label %idxend17, label %oob16
oob16: ; preds = %idxend15
%93 = alloca i32, align 4
store i32 %87, i32* %93, align 4
call void @jl_bounds_error_ints(i8** nonnull %1, i32* nonnull %93, i32 1)
unreachable
idxend17: ; preds = %idxend15
%94 = getelementptr [7 x i8**], [7 x i8**]* %4, i32 0, i32 5
store i8** %25, i8*** %94, align 4
%95 = add i32 %i.1, -2
%96 = icmp ult i32 %95, %74
br i1 %96, label %idxend19, label %oob18
oob18: ; preds = %idxend17
%97 = add i32 %i.1, -1
%98 = alloca i32, align 4
store i32 %97, i32* %98, align 4
call void @jl_bounds_error_ints(i8** %25, i32* nonnull %98, i32 1)
unreachable
idxend19: ; preds = %idxend17
%99 = getelementptr i32, i32* %85, i32 %95
%100 = load i32, i32* %99, align 4
%101 = add i32 %100, -1
%102 = icmp ult i32 %101, %91
br i1 %102, label %idxend21, label %oob20
oob20: ; preds = %idxend19
%103 = alloca i32, align 4
store i32 %100, i32* %103, align 4
call void @jl_bounds_error_ints(i8** nonnull %1, i32* nonnull %103, i32 1)
unreachable
idxend21: ; preds = %idxend19
%104 = bitcast i8** %1 to i32**
%105 = load i32*, i32** %104, align 4
%106 = getelementptr i32, i32* %105, i32 %88
%107 = load i32, i32* %106, align 4
%108 = sitofp i32 %107 to double
%109 = fmul double %108, 5.000000e-01
%110 = getelementptr i32, i32* %105, i32 %101
%111 = load i32, i32* %110, align 4
%112 = sitofp i32 %111 to double
%113 = fmul double %112, 5.000000e-01
%114 = fadd double %109, %113
%115 = bitcast [8 x i8]* %0 to i8**
%116 = insertvalue { i8**, i8 } undef, i8** %115, 0
%117 = insertvalue { i8**, i8 } %116, i8 2, 1
%118 = bitcast [8 x i8]* %0 to double*
store double %114, double* %118, align 1
%119 = load i32, i32* %11, align 4
store i32 %119, i32* %9, align 4
ret { i8**, i8 } %117
oob26: ; preds = %L72
%120 = add i32 %i.1, -1
%121 = alloca i32, align 4
store i32 %120, i32* %121, align 4
call void @jl_bounds_error_ints(i8** %25, i32* nonnull %121, i32 1)
unreachable
idxend27: ; preds = %L72
%122 = bitcast i8** %25 to i32**
%123 = load i32*, i32** %122, align 4
%124 = getelementptr i32, i32* %123, i32 %77
%125 = load i32, i32* %124, align 4
%126 = add i32 %125, -1
%127 = getelementptr i8*, i8** %1, i32 4
%128 = bitcast i8** %127 to i32*
%129 = load i32, i32* %128, align 4
%130 = icmp ult i32 %126, %129
br i1 %130, label %idxend29, label %oob28
oob28: ; preds = %idxend27
%131 = alloca i32, align 4
store i32 %125, i32* %131, align 4
call void @jl_bounds_error_ints(i8** nonnull %1, i32* nonnull %131, i32 1)
unreachable
idxend29: ; preds = %idxend27
%132 = bitcast i8** %1 to i32**
%133 = load i32*, i32** %132, align 4
%134 = getelementptr i32, i32* %133, i32 %126
%135 = load i32, i32* %134, align 4
%136 = sitofp i32 %135 to double
%137 = bitcast [8 x i8]* %0 to i8**
%138 = insertvalue { i8**, i8 } undef, i8** %137, 0
%139 = insertvalue { i8**, i8 } %138, i8 2, 1
%140 = bitcast [8 x i8]* %0 to double*
store double %136, double* %140, align 1
%141 = load i32, i32* %11, align 4
store i32 %141, i32* %9, align 4
ret { i8**, i8 } %139
}
julia> @code_native wmedian([1, 2, 4, 7, 10, 15], [1/3, 1/3, 1/3, 1, 1, 1])
.text
Filename: REPL[1]
pushl %ebp
movl %esp, %ebp
pushl %ebx
pushl %edi
pushl %esi
subl $108, %esp
movl $0, -52(%ebp)
movl $0, -56(%ebp)
movl $0, -44(%ebp)
movl $0, -48(%ebp)
movl $0, -36(%ebp)
movl $0, -40(%ebp)
movl $0, -28(%ebp)
movl $0, -32(%ebp)
movl $0, -72(%ebp)
movl $0, -76(%ebp)
movl $0, -64(%ebp)
movl $0, -68(%ebp)
movl %gs:0, %ebx
addl $4294961772, %ebx # imm = 0xFFFFEA6C
movl $26, -84(%ebp)
movl 8(%ebp), %esi
movl 16(%ebp), %edi
leal -84(%ebp), %ecx
movl (%ebx), %eax
movl %eax, -80(%ebp)
movl %ecx, (%ebx)
movl $0, -60(%ebp)
Source line: 2
subl $12, %esp
pushl %edi
calll _mapreduce
addl $16, %esp
fstpl -104(%ebp)
movsd -104(%ebp), %xmm0 # xmm0 = mem[0],zero
mulsd -1495812416, %xmm0
movsd %xmm0, -96(%ebp)
Source line: 3
subl $8, %esp
leal -120(%ebp), %eax
pushl %edi
pushl %eax
calll findmax
movsd -96(%ebp), %xmm1 # xmm1 = mem[0],zero
addl $12, %esp
movsd -120(%ebp), %xmm0 # xmm0 = mem[0],zero
Source line: 4
ucomisd %xmm1, %xmm0
ja L409
movl 12(%ebp), %ecx
leal -56(%ebp), %eax
Source line: 7
movl $2976914688, -56(%ebp) # imm = 0xB1701D00
movl $2976910032, -52(%ebp) # imm = 0xB1700AD0
movl $2976914556, -48(%ebp) # imm = 0xB1701C7C
movl $2976913332, -44(%ebp) # imm = 0xB17017B4
movl $2977015848, -40(%ebp) # imm = 0xB171A828
movl $2976909808, -36(%ebp) # imm = 0xB17009F0
movl $2976914708, -32(%ebp) # imm = 0xB1701D14
movl %ecx, %edi
movl %edi, -28(%ebp)
subl $4, %esp
pushl $8
pushl %eax
pushl $2786260240 # imm = 0xA612F510
calll jl_invoke
addl $16, %esp
movl %eax, -76(%ebp)
Source line: 10
movl %eax, -72(%ebp)
movl 4(%eax), %ecx
movl %ebx, -20(%ebp)
testl %ecx, %ecx
je L454
movl 16(%ebp), %edi
movl %ecx, -24(%ebp)
movsd -96(%ebp), %xmm1 # xmm1 = mem[0],zero
xorpd %xmm0, %xmm0
Source line: 65
movl 16(%eax), %esi
Source line: 16
movl (%edi), %edx
Source line: 19
movl %edx, -16(%ebp)
xorl %edx, %edx
movl 16(%edi), %ebx
nopl (%eax)
Source line: 65
L336:
cmpl %esi, %edx
jae L731
Source line: 12
ucomisd %xmm1, %xmm0
jne L352
jnp L467
L352:
movl %eax, %edi
Source line: 65
movl (%eax), %eax
Source line: 15
ucomisd %xmm1, %xmm0
Source line: 65
movl (%eax,%edx,4), %ecx
Source line: 19
leal -1(%ecx), %eax
ja L483
cmpl %ebx, %eax
jae L757
movl -16(%ebp), %eax
Source line: 10
incl %edx
cmpl %edx, -24(%ebp)
Source line: 19
addsd -8(%eax,%ecx,8), %xmm0
movl %edi, %eax
jne L336
movl 12(%ebp), %ebx
movl 8(%ebp), %esi
movl -24(%ebp), %edx
movl %ebx, %edi
movl -20(%ebp), %ebx
jmp L516
Source line: 3
L409:
movl -112(%ebp), %eax
movl 12(%ebp), %edx
Source line: 5
leal -1(%eax), %ecx
movl %edx, %edi
cmpl 16(%edx), %ecx
jae L784
movl (%edi), %ecx
movb $1, %dl
movl -4(%ecx,%eax,4), %eax
movl %eax, (%esi)
movl -80(%ebp), %eax
movl %eax, (%ebx)
movl %esi, %eax
leal -12(%ebp), %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
retl
L454:
movsd -96(%ebp), %xmm1 # xmm1 = mem[0],zero
xorpd %xmm0, %xmm0
xorl %edx, %edx
jmp L516
L467:
movl 12(%ebp), %ebx
movl 8(%ebp), %esi
Source line: 12
addl $2, %edx
movl %ebx, %edi
movl -20(%ebp), %ebx
jmp L516
L483:
cmpl %ebx, %eax
Source line: 16
jae L972
movl -16(%ebp), %eax
movl 12(%ebp), %ebx
movl 8(%ebp), %esi
incl %edx
subsd -8(%eax,%ecx,8), %xmm0
movl %ebx, %ecx
movl -20(%ebp), %ebx
movl %edi, %eax
movl %ecx, %edi
Source line: 21
L516:
ucomisd %xmm1, %xmm0
jne L524
jnp L589
Source line: 24
L524:
movl %eax, -60(%ebp)
leal -2(%edx), %ecx
cmpl 16(%eax), %ecx
jae L809
movl (%eax), %eax
movl -8(%eax,%edx,4), %eax
leal -1(%eax), %ecx
cmpl 16(%edi), %ecx
jae L837
movl (%edi), %ecx
movb $2, %dl
xorps %xmm0, %xmm0
cvtsi2sdl -4(%ecx,%eax,4), %xmm0
movl -80(%ebp), %eax
movsd %xmm0, (%esi)
movl %eax, (%ebx)
movl %esi, %eax
leal -12(%ebp), %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
retl
Source line: 22
L589:
movl %eax, -68(%ebp)
leal -3(%edx), %ecx
movl %edx, %ebx
movl 16(%eax), %edx
cmpl %edx, %ecx
jae L862
movl (%eax), %esi
movl %eax, -16(%ebp)
movl 16(%edi), %edi
movl -12(%esi,%ebx,4), %ecx
leal -1(%ecx), %eax
cmpl %edi, %eax
jae L890
movl -16(%ebp), %eax
movl %eax, -64(%ebp)
leal -2(%ebx), %eax
cmpl %edx, %eax
jae L917
movl -8(%esi,%ebx,4), %eax
leal -1(%eax), %edx
cmpl %edi, %edx
jae L945
movl 12(%ebp), %edx
movsd -1495812416, %xmm1 # xmm1 = mem[0],zero
movl (%edx), %edx
xorps %xmm0, %xmm0
cvtsi2sdl -4(%edx,%ecx,4), %xmm0
xorps %xmm2, %xmm2
cvtsi2sdl -4(%edx,%eax,4), %xmm2
movl 8(%ebp), %eax
movl -80(%ebp), %ecx
movl -20(%ebp), %edx
mulsd %xmm1, %xmm0
mulsd %xmm1, %xmm2
addsd %xmm0, %xmm2
movsd %xmm2, (%eax)
movl %ecx, (%edx)
movb $2, %dl
leal -12(%ebp), %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
retl
Source line: 65
L731:
movl %esp, %esi
leal -16(%esi), %ecx
movl %ecx, %esp
incl %edx
movl %edx, -16(%esi)
subl $4, %esp
pushl $1
pushl %ecx
pushl %eax
calll jl_bounds_error_ints
addl $4, %esp
Source line: 19
L757:
movl %esp, %eax
leal -16(%eax), %edx
movl %edx, %esp
movl %ecx, -16(%eax)
subl $4, %esp
pushl $1
pushl %edx
pushl 16(%ebp)
calll jl_bounds_error_ints
addl $4, %esp
Source line: 5
L784:
movl %esp, %ecx
leal -16(%ecx), %edx
movl %edx, %esp
movl %eax, -16(%ecx)
subl $4, %esp
pushl $1
pushl %edx
pushl %edi
calll jl_bounds_error_ints
addl $4, %esp
Source line: 24
L809:
decl %edx
movl %edx, %esi
movl %esp, %edx
leal -16(%edx), %ecx
movl %ecx, %esp
movl %esi, -16(%edx)
subl $4, %esp
pushl $1
pushl %ecx
pushl %eax
calll jl_bounds_error_ints
addl $4, %esp
L837:
movl %esp, %ecx
leal -16(%ecx), %edx
movl %edx, %esp
movl %eax, -16(%ecx)
subl $4, %esp
pushl $1
pushl %edx
pushl %edi
calll jl_bounds_error_ints
addl $4, %esp
Source line: 22
L862:
movl %esp, %edx
addl $-2, %ebx
leal -16(%edx), %ecx
movl %ecx, %esp
movl %ebx, -16(%edx)
subl $4, %esp
pushl $1
pushl %ecx
pushl %eax
calll jl_bounds_error_ints
addl $4, %esp
L890:
movl %esp, %eax
leal -16(%eax), %edx
movl %edx, %esp
movl %ecx, -16(%eax)
subl $4, %esp
pushl $1
pushl %edx
pushl 12(%ebp)
calll jl_bounds_error_ints
addl $4, %esp
L917:
movl %esp, %eax
decl %ebx
leal -16(%eax), %ecx
movl %ecx, %esp
movl %ebx, -16(%eax)
subl $4, %esp
pushl $1
pushl %ecx
pushl -16(%ebp)
calll jl_bounds_error_ints
addl $4, %esp
L945:
movl %esp, %ecx
leal -16(%ecx), %edx
movl %edx, %esp
movl %eax, -16(%ecx)
subl $4, %esp
pushl $1
pushl %edx
pushl 12(%ebp)
calll jl_bounds_error_ints
addl $4, %esp
Source line: 16
L972:
movl %esp, %eax
leal -16(%eax), %edx
movl %edx, %esp
movl %ecx, -16(%eax)
subl $4, %esp
pushl $1
pushl %edx
pushl 16(%ebp)
calll jl_bounds_error_ints
addl $4, %esp
nopw (%eax,%eax)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment