Skip to content

Instantly share code, notes, and snippets.

@tkf
Last active July 18, 2018 20:50
Show Gist options
  • Save tkf/03e0b87c37db85b60bad91fef0a3efcb to your computer and use it in GitHub Desktop.
Save tkf/03e0b87c37db85b60bad91fef0a3efcb to your computer and use it in GitHub Desktop.
Revision: 2386264789419e07e5f4c29666c001db3d2a232d
.text
Filename: bench_exp_taylor.jl
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
andq $-32, %rsp
subq $128, %rsp
movq %rsi, %r14
movq %rdi, %r13
Source line: 64
movq 24(%r13), %rbx
xorl %eax, %eax
Source line: 209
testq %rbx, %rbx
cmovsq %rax, %rbx
movq %rbx, 16(%rsp)
Source line: 64
movq 24(%r14), %rcx
Source line: 210
testq %rcx, %rcx
cmovsq %rax, %rcx
movq %rcx, 24(%rsp)
movabsq $check_broadcast_shape, %rax
leaq 16(%rsp), %rdi
leaq 24(%rsp), %rsi
callq *%rax
Source line: 68
testq %rbx, %rbx
jle L270
Source line: 64
movq 24(%r14), %rax
xorl %ecx, %ecx
Source line: 111
testq %rax, %rax
cmovnsq %rax, %rcx
xorl %r15d, %r15d
leaq 32(%rsp), %r12
Source line: 121
cmpq %rcx, %rbx
jne L208
nopl (%rax)
Source line: 153
L128:
movq (%r14), %rax
vmovups (%rax,%r15), %ymm0
vmovaps %ymm0, 32(%rsp)
leaq 64(%rsp), %rdi
Source line: 155
movq %r12, %rsi
movabsq $exp_taylor, %rax
vzeroupper
callq *%rax
vmovaps 64(%rsp), %ymm0
Source line: 300
movq (%r13), %rax
vmovups %ymm0, (%rax,%r15)
Source line: 71
addq $32, %r15
addq $-1, %rbx
jne L128
jmp L270
nopw %cs:(%rax,%rax)
Source line: 153
L208:
movq (%r14), %rax
vmovups (%rax), %ymm0
vmovaps %ymm0, 32(%rsp)
leaq 64(%rsp), %rdi
Source line: 155
movq %r12, %rsi
movabsq $exp_taylor, %rax
vzeroupper
callq *%rax
vmovaps 64(%rsp), %ymm0
Source line: 300
movq (%r13), %rax
vmovups %ymm0, (%rax,%r15)
Source line: 71
addq $32, %r15
addq $-1, %rbx
jne L208
Source line: 20
L270:
movq %r13, %rax
leaq -40(%rbp), %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
vzeroupper
retq
nopw %cs:(%rax,%rax)
.text
Filename: bench_exp_taylor.jl
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %rbx
andq $-32, %rsp
subq $160, %rsp
movq %rsi, %r15
movq %rdi, %r14
Source line: 17
movabsq $literal_pow, %rax
leaq 96(%rsp), %rdi
movabsq $139733918657712, %rbx # imm = 0x7F165695ACB0
movq %rbx, %rdx
callq *%rax
vmovsd (%r15), %xmm0 # xmm0 = mem[0],zero
movabsq $139733179072216, %rax # imm = 0x7F162A807ED8
vaddsd (%rax), %xmm0, %xmm0
vmovapd %xmm0, 16(%rsp)
movabsq $139733179072224, %rax # imm = 0x7F162A807EE0
vbroadcastsd (%rax), %ymm0
vmulpd 96(%rsp), %ymm0, %ymm0
vmovapd %ymm0, 32(%rsp)
addq $1536, %rbx # imm = 0x600
movabsq $literal_pow, %rax
leaq 64(%rsp), %rdi
movq %r15, %rsi
movq %rbx, %rdx
vzeroupper
callq *%rax
movabsq $139733179072232, %rax # imm = 0x7F162A807EE8
vbroadcastsd (%rax), %ymm0
vmovapd 64(%rsp), %ymm1
vdivpd %ymm0, %ymm1, %ymm0
vmovapd 16(%rsp), %xmm1
vmovhpd 8(%r15), %xmm1, %xmm1 # xmm1 = xmm1[0],mem[0]
vinsertf128 $1, 16(%r15), %ymm1, %ymm1
vaddpd 32(%rsp), %ymm1, %ymm1
vaddpd %ymm0, %ymm1, %ymm0
vmovupd %ymm0, (%r14)
movq %r14, %rax
leaq -24(%rbp), %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
vzeroupper
retq
nopl (%rax)
Revision: 6d0eb49e3ef74d2e97c7075af088f3ef1590a43e
define i8** @"julia_vec_exp!_63127"(i8** dereferenceable(40), i8** dereferenceable(40)) #0 !dbg !5 {
top:
%2 = alloca [1 x %OneTo.3], align 8
%3 = alloca <4 x i64>, align 8
%tmpcast81 = bitcast <4 x i64>* %3 to %Dual.67*
%shape = alloca [1 x %OneTo.3], align 8
%val_2 = alloca <4 x i64>, align 8
%tmpcast = bitcast <4 x i64>* %val_2 to %Dual.67*
%4 = getelementptr i8*, i8** %0, i64 3
%5 = bitcast i8** %4 to i64*
%6 = load i64, i64* %5, align 8
%7 = icmp slt i64 %6, 0
%8 = select i1 %7, i64 0, i64 %6
%9 = getelementptr inbounds [1 x %OneTo.3], [1 x %OneTo.3]* %shape, i64 0, i64 0, i32 0
store i64 %8, i64* %9, align 8
%10 = getelementptr i8*, i8** %1, i64 3
%11 = bitcast i8** %10 to i64*
%12 = load i64, i64* %11, align 8
%13 = icmp slt i64 %12, 0
%14 = select i1 %13, i64 0, i64 %12
%15 = getelementptr inbounds [1 x %OneTo.3], [1 x %OneTo.3]* %2, i64 0, i64 0, i32 0
store i64 %14, i64* %15, align 8
call void @julia_check_broadcast_shape_63128([1 x %OneTo.3]* nocapture nonnull readonly %shape, [1 x %OneTo.3]* nocapture nonnull readonly %2)
%16 = icmp slt i64 %8, 1
br i1 %16, label %L112, label %if11.lr.ph
if11.lr.ph: ; preds = %top
%17 = load i64, i64* %11, align 8
%18 = icmp slt i64 %17, 0
%19 = select i1 %18, i64 0, i64 %17
%20 = icmp eq i64 %8, %19
%21 = bitcast i8** %1 to %Dual.67**
%22 = bitcast i8** %0 to %Dual.67**
br i1 %20, label %if11.us.preheader, label %if11.preheader
if11.preheader: ; preds = %if11.lr.ph
br label %if11
if11.us.preheader: ; preds = %if11.lr.ph
br label %if11.us
if11.us: ; preds = %if11.us.preheader, %if11.us
%"i#804.063.us" = phi i64 [ %30, %if11.us ], [ 0, %if11.us.preheader ]
%23 = load %Dual.67*, %Dual.67** %21, align 8
%.elt.us = getelementptr inbounds %Dual.67, %Dual.67* %23, i64 %"i#804.063.us", i32 0
%24 = bitcast double* %.elt.us to <4 x i64>*
%25 = load <4 x i64>, <4 x i64>* %24, align 8
store <4 x i64> %25, <4 x i64>* %val_2, align 8
call void @julia_exp_taylor_63129(%Dual.67* noalias nocapture nonnull sret %tmpcast81, %Dual.67* nocapture nonnull readonly %tmpcast)
%26 = load <4 x i64>, <4 x i64>* %3, align 8
%27 = load %Dual.67*, %Dual.67** %22, align 8
%28 = getelementptr %Dual.67, %Dual.67* %27, i64 %"i#804.063.us"
%29 = bitcast %Dual.67* %28 to <4 x i64>*
store <4 x i64> %26, <4 x i64>* %29, align 8
%30 = add nuw nsw i64 %"i#804.063.us", 1
%exitcond79 = icmp eq i64 %30, %8
br i1 %exitcond79, label %L112.loopexit, label %if11.us
L112.loopexit: ; preds = %if11.us
br label %L112
L112.loopexit82: ; preds = %if11
br label %L112
L112: ; preds = %L112.loopexit82, %L112.loopexit, %top
ret i8** %0
if11: ; preds = %if11.preheader, %if11
%"i#804.063" = phi i64 [ %38, %if11 ], [ 0, %if11.preheader ]
%31 = bitcast i8** %1 to <4 x i64>**
%32 = load <4 x i64>*, <4 x i64>** %31, align 8
%33 = load <4 x i64>, <4 x i64>* %32, align 8
store <4 x i64> %33, <4 x i64>* %val_2, align 8
call void @julia_exp_taylor_63129(%Dual.67* noalias nocapture nonnull sret %tmpcast81, %Dual.67* nocapture nonnull readonly %tmpcast)
%34 = load <4 x i64>, <4 x i64>* %3, align 8
%35 = load %Dual.67*, %Dual.67** %22, align 8
%36 = getelementptr %Dual.67, %Dual.67* %35, i64 %"i#804.063"
%37 = bitcast %Dual.67* %36 to <4 x i64>*
store <4 x i64> %34, <4 x i64>* %37, align 8
%38 = add nuw nsw i64 %"i#804.063", 1
%exitcond = icmp eq i64 %38, %8
br i1 %exitcond, label %L112.loopexit82, label %if11
}
define void @julia_exp_taylor_63129(%Dual.67* noalias nocapture sret, %Dual.67* nocapture readonly dereferenceable(32)) #0 !dbg !5 {
top:
%2 = alloca <4 x double>, align 8
%tmpcast = bitcast <4 x double>* %2 to %Dual.67*
%3 = alloca <4 x double>, align 8
%tmpcast48 = bitcast <4 x double>* %3 to %Dual.67*
call void @julia_literal_pow_63130(%Dual.67* noalias nocapture nonnull sret %tmpcast, %Dual.67* nocapture nonnull readonly %1, i8** inttoptr (i64 140126153960624 to i8**))
%4 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 0
%5 = load double, double* %4, align 8
%6 = fadd double %5, 1.000000e+00
%7 = load <4 x double>, <4 x double>* %2, align 8
%8 = fmul <4 x double> %7, <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
%9 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 0
%10 = load double, double* %9, align 8
%11 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 1
%12 = load double, double* %11, align 8
%13 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 2
%14 = load double, double* %13, align 8
call void @julia_literal_pow_63131(%Dual.67* noalias nocapture nonnull sret %tmpcast48, %Dual.67* nocapture nonnull readonly %1, i8** inttoptr (i64 140126153962160 to i8**))
%15 = insertelement <4 x double> undef, double %6, i32 0
%16 = insertelement <4 x double> %15, double %10, i32 1
%17 = insertelement <4 x double> %16, double %12, i32 2
%18 = insertelement <4 x double> %17, double %14, i32 3
%19 = fadd <4 x double> %18, %8
%20 = load <4 x double>, <4 x double>* %3, align 8
%21 = fdiv <4 x double> %20, <double 6.000000e+00, double 6.000000e+00, double 6.000000e+00, double 6.000000e+00>
%22 = fadd <4 x double> %19, %21
%23 = bitcast %Dual.67* %0 to <4 x double>*
store <4 x double> %22, <4 x double>* %23, align 8
ret void
}
Revision: 6d0eb49e3ef74d2e97c7075af088f3ef1590a43e
.text
Filename: bench_exp_taylor.jl
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
andq $-32, %rsp
subq $128, %rsp
movq %rsi, %r14
movq %rdi, %r13
Source line: 64
movq 24(%r13), %rbx
xorl %eax, %eax
Source line: 209
testq %rbx, %rbx
cmovsq %rax, %rbx
movq %rbx, 16(%rsp)
Source line: 64
movq 24(%r14), %rcx
Source line: 210
testq %rcx, %rcx
cmovsq %rax, %rcx
movq %rcx, 24(%rsp)
movabsq $check_broadcast_shape, %rax
leaq 16(%rsp), %rdi
leaq 24(%rsp), %rsi
callq *%rax
Source line: 68
testq %rbx, %rbx
jle L270
Source line: 64
movq 24(%r14), %rax
xorl %ecx, %ecx
Source line: 111
testq %rax, %rax
cmovnsq %rax, %rcx
xorl %r15d, %r15d
leaq 32(%rsp), %r12
Source line: 121
cmpq %rcx, %rbx
jne L208
nopl (%rax)
Source line: 153
L128:
movq (%r14), %rax
vmovups (%rax,%r15), %ymm0
vmovaps %ymm0, 32(%rsp)
leaq 64(%rsp), %rdi
Source line: 155
movq %r12, %rsi
movabsq $exp_taylor, %rax
vzeroupper
callq *%rax
vmovaps 64(%rsp), %ymm0
Source line: 300
movq (%r13), %rax
vmovups %ymm0, (%rax,%r15)
Source line: 71
addq $32, %r15
addq $-1, %rbx
jne L128
jmp L270
nopw %cs:(%rax,%rax)
Source line: 153
L208:
movq (%r14), %rax
vmovups (%rax), %ymm0
vmovaps %ymm0, 32(%rsp)
leaq 64(%rsp), %rdi
Source line: 155
movq %r12, %rsi
movabsq $exp_taylor, %rax
vzeroupper
callq *%rax
vmovaps 64(%rsp), %ymm0
Source line: 300
movq (%r13), %rax
vmovups %ymm0, (%rax,%r15)
Source line: 71
addq $32, %r15
addq $-1, %rbx
jne L208
Source line: 20
L270:
movq %r13, %rax
leaq -40(%rbp), %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
vzeroupper
retq
nopw %cs:(%rax,%rax)
.text
Filename: bench_exp_taylor.jl
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rsi, %rbx
movq %rdi, %r15
movabsq $140081698299528, %r14 # imm = 0x7F674FDDDE88
Source line: 17
vmovsd (%rbx), %xmm0 # xmm0 = mem[0],zero
Source line: 714
vmulsd %xmm0, %xmm0, %xmm1
movabsq $140080955539424, %rax # imm = 0x7F6723983FE0
vaddsd (%rax), %xmm0, %xmm2
Source line: 315
vucomisd %xmm2, %xmm2
jp L67
vucomisd %xmm1, %xmm1
jp L359
L67:
vmovapd %xmm1, -80(%rbp)
movabsq $140080955539432, %rax # imm = 0x7F6723983FE8
Source line: 714
vaddsd (%rax), %xmm0, %xmm1
Source line: 315
vucomisd %xmm0, %xmm0
jnp L102
vucomisd %xmm1, %xmm1
jnp L374
L102:
vmovapd %xmm1, -48(%rbp)
movabsq $140080955539440, %rax # imm = 0x7F6723983FF0
Source line: 714
vmovsd (%rax), %xmm1 # xmm1 = mem[0],zero
vmovsd %xmm1, -32(%rbp)
movabsq $__pow, %rax
vmovapd %xmm0, -64(%rbp)
callq *%rax
vmovsd -32(%rbp), %xmm7 # xmm7 = mem[0],zero
vmovapd -64(%rbp), %xmm6
Source line: 315
vucomisd %xmm0, %xmm0
jnp L173
Source line: 714
vaddsd %xmm7, %xmm6, %xmm1
vucomisd %xmm1, %xmm1
jnp L389
Source line: 155
L173:
vmovupd 8(%rbx), %xmm1
vaddpd %xmm1, %xmm1, %xmm2
vmovsd 24(%rbx), %xmm3 # xmm3 = mem[0],zero
vaddsd %xmm3, %xmm3, %xmm4
Source line: 155
vmovddup %xmm6, %xmm5 # xmm5 = xmm6[0,0]
vmulpd %xmm2, %xmm5, %xmm2
vmulsd %xmm4, %xmm6, %xmm4
Source line: 17
vpermilpd $1, %xmm2, %xmm5 # xmm5 = xmm2[1,0]
vunpcklpd %xmm4, %xmm5, %xmm4 # xmm4 = xmm5[0],xmm4[0]
vmovapd -80(%rbp), %xmm6
vunpcklpd %xmm2, %xmm6, %xmm2 # xmm2 = xmm6[0],xmm2[0]
vinsertf128 $1, %xmm4, %ymm2, %ymm2
movabsq $140080955539448, %rax # imm = 0x7F6723983FF8
vbroadcastsd (%rax), %ymm4
vmulpd %ymm4, %ymm2, %ymm8
Source line: 155
vmulsd %xmm7, %xmm1, %xmm4
vpermilpd $1, %xmm1, %xmm5 # xmm5 = xmm1[1,0]
vmulsd %xmm7, %xmm5, %xmm2
vmulsd %xmm7, %xmm3, %xmm7
Source line: 155
vmulsd %xmm4, %xmm6, %xmm4
vmulsd %xmm2, %xmm6, %xmm2
vmulsd %xmm7, %xmm6, %xmm7
Source line: 17
vunpcklpd %xmm3, %xmm5, %xmm3 # xmm3 = xmm5[0],xmm3[0]
vmovapd -48(%rbp), %xmm5
vunpcklpd %xmm1, %xmm5, %xmm1 # xmm1 = xmm5[0],xmm1[0]
vunpcklpd %xmm7, %xmm2, %xmm5 # xmm5 = xmm2[0],xmm7[0]
vunpcklpd %xmm4, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm4[0]
vinsertf128 $1, %xmm5, %ymm0, %ymm0
movabsq $140080955539456, %rax # imm = 0x7F6723984000
vbroadcastsd (%rax), %ymm4
vdivpd %ymm4, %ymm0, %ymm0
vinsertf128 $1, %xmm3, %ymm1, %ymm1
vaddpd %ymm1, %ymm8, %ymm1
vaddpd %ymm0, %ymm1, %ymm0
vmovupd %ymm0, (%r15)
movq %r15, %rax
addq $56, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
vzeroupper
retq
Source line: 315
L359:
movabsq $jl_throw, %rax
movq %r14, %rdi
callq *%rax
Source line: 315
L374:
movabsq $jl_throw, %rax
movq %r14, %rdi
callq *%rax
Source line: 315
L389:
movabsq $jl_throw, %rax
movq %r14, %rdi
callq *%rax
nopw %cs:(%rax,%rax)
Revision: 3f62885ecf2523c7e0707863a2c11ccf93fccc5f
define i8** @"julia_vec_exp!_63126"(i8** dereferenceable(40), i8** dereferenceable(40)) #0 !dbg !5 {
top:
%2 = alloca [1 x %OneTo.3], align 8
%3 = alloca <4 x i64>, align 8
%tmpcast81 = bitcast <4 x i64>* %3 to %Dual.67*
%shape = alloca [1 x %OneTo.3], align 8
%val_2 = alloca <4 x i64>, align 8
%tmpcast = bitcast <4 x i64>* %val_2 to %Dual.67*
%4 = getelementptr i8*, i8** %0, i64 3
%5 = bitcast i8** %4 to i64*
%6 = load i64, i64* %5, align 8
%7 = icmp slt i64 %6, 0
%8 = select i1 %7, i64 0, i64 %6
%9 = getelementptr inbounds [1 x %OneTo.3], [1 x %OneTo.3]* %shape, i64 0, i64 0, i32 0
store i64 %8, i64* %9, align 8
%10 = getelementptr i8*, i8** %1, i64 3
%11 = bitcast i8** %10 to i64*
%12 = load i64, i64* %11, align 8
%13 = icmp slt i64 %12, 0
%14 = select i1 %13, i64 0, i64 %12
%15 = getelementptr inbounds [1 x %OneTo.3], [1 x %OneTo.3]* %2, i64 0, i64 0, i32 0
store i64 %14, i64* %15, align 8
call void @julia_check_broadcast_shape_63127([1 x %OneTo.3]* nocapture nonnull readonly %shape, [1 x %OneTo.3]* nocapture nonnull readonly %2)
%16 = icmp slt i64 %8, 1
br i1 %16, label %L112, label %if11.lr.ph
if11.lr.ph: ; preds = %top
%17 = load i64, i64* %11, align 8
%18 = icmp slt i64 %17, 0
%19 = select i1 %18, i64 0, i64 %17
%20 = icmp eq i64 %8, %19
%21 = bitcast i8** %1 to %Dual.67**
%22 = bitcast i8** %0 to %Dual.67**
br i1 %20, label %if11.us.preheader, label %if11.preheader
if11.preheader: ; preds = %if11.lr.ph
br label %if11
if11.us.preheader: ; preds = %if11.lr.ph
br label %if11.us
if11.us: ; preds = %if11.us.preheader, %if11.us
%"i#804.063.us" = phi i64 [ %30, %if11.us ], [ 0, %if11.us.preheader ]
%23 = load %Dual.67*, %Dual.67** %21, align 8
%.elt.us = getelementptr inbounds %Dual.67, %Dual.67* %23, i64 %"i#804.063.us", i32 0
%24 = bitcast double* %.elt.us to <4 x i64>*
%25 = load <4 x i64>, <4 x i64>* %24, align 8
store <4 x i64> %25, <4 x i64>* %val_2, align 8
call void @julia_exp_taylor_63128(%Dual.67* noalias nocapture nonnull sret %tmpcast81, %Dual.67* nocapture nonnull readonly %tmpcast)
%26 = load <4 x i64>, <4 x i64>* %3, align 8
%27 = load %Dual.67*, %Dual.67** %22, align 8
%28 = getelementptr %Dual.67, %Dual.67* %27, i64 %"i#804.063.us"
%29 = bitcast %Dual.67* %28 to <4 x i64>*
store <4 x i64> %26, <4 x i64>* %29, align 8
%30 = add nuw nsw i64 %"i#804.063.us", 1
%exitcond79 = icmp eq i64 %30, %8
br i1 %exitcond79, label %L112.loopexit, label %if11.us
L112.loopexit: ; preds = %if11.us
br label %L112
L112.loopexit82: ; preds = %if11
br label %L112
L112: ; preds = %L112.loopexit82, %L112.loopexit, %top
ret i8** %0
if11: ; preds = %if11.preheader, %if11
%"i#804.063" = phi i64 [ %38, %if11 ], [ 0, %if11.preheader ]
%31 = bitcast i8** %1 to <4 x i64>**
%32 = load <4 x i64>*, <4 x i64>** %31, align 8
%33 = load <4 x i64>, <4 x i64>* %32, align 8
store <4 x i64> %33, <4 x i64>* %val_2, align 8
call void @julia_exp_taylor_63128(%Dual.67* noalias nocapture nonnull sret %tmpcast81, %Dual.67* nocapture nonnull readonly %tmpcast)
%34 = load <4 x i64>, <4 x i64>* %3, align 8
%35 = load %Dual.67*, %Dual.67** %22, align 8
%36 = getelementptr %Dual.67, %Dual.67* %35, i64 %"i#804.063"
%37 = bitcast %Dual.67* %36 to <4 x i64>*
store <4 x i64> %34, <4 x i64>* %37, align 8
%38 = add nuw nsw i64 %"i#804.063", 1
%exitcond = icmp eq i64 %38, %8
br i1 %exitcond, label %L112.loopexit82, label %if11
}
define void @julia_exp_taylor_63128(%Dual.67* noalias nocapture sret, %Dual.67* nocapture readonly dereferenceable(32)) #0 !dbg !5 {
top:
%2 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 0
%3 = load double, double* %2, align 8
%pow2 = fmul double %3, %3
%4 = fadd double %3, 2.000000e+00
%notlhs = fcmp ord double %pow2, 0.000000e+00
%notrhs = fcmp uno double %4, 0.000000e+00
%5 = or i1 %notrhs, %notlhs
br i1 %5, label %L16, label %if
if: ; preds = %top
call void @jl_throw(i8** inttoptr (i64 139959341768328 to i8**))
unreachable
L16: ; preds = %top
%6 = fadd double %3, 1.000000e+00
%notlhs28 = fcmp ord double %3, 0.000000e+00
%notrhs29 = fcmp uno double %6, 0.000000e+00
%7 = or i1 %notlhs28, %notrhs29
br i1 %7, label %L33, label %if15
if15: ; preds = %L16
call void @jl_throw(i8** inttoptr (i64 139959341768328 to i8**))
unreachable
L33: ; preds = %L16
%8 = call double @llvm.pow.f64(double %3, double 3.000000e+00)
%9 = fadd double %3, 3.000000e+00
%notlhs33 = fcmp ord double %8, 0.000000e+00
%notrhs34 = fcmp uno double %9, 0.000000e+00
%10 = or i1 %notlhs33, %notrhs34
br i1 %10, label %L134, label %if18
if18: ; preds = %L33
call void @jl_throw(i8** inttoptr (i64 139959341768328 to i8**))
unreachable
L134: ; preds = %L33
%11 = fmul double %3, 2.000000e+00
%12 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 0
%13 = bitcast double* %12 to <2 x double>*
%14 = load <2 x double>, <2 x double>* %13, align 8
%15 = insertelement <2 x double> undef, double %11, i32 0
%16 = insertelement <2 x double> %15, double %11, i32 1
%17 = fmul <2 x double> %16, %14
%18 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 2
%19 = load double, double* %18, align 8
%20 = fmul double %11, %19
%21 = insertelement <4 x double> undef, double %pow2, i32 0
%22 = extractelement <2 x double> %17, i32 0
%23 = insertelement <4 x double> %21, double %22, i32 1
%24 = extractelement <2 x double> %17, i32 1
%25 = insertelement <4 x double> %23, double %24, i32 2
%26 = insertelement <4 x double> %25, double %20, i32 3
%27 = fmul <4 x double> %26, <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
%28 = fmul double %pow2, 3.000000e+00
%29 = extractelement <2 x double> %14, i32 0
%30 = fmul double %28, %29
%31 = extractelement <2 x double> %14, i32 1
%32 = fmul double %28, %31
%33 = fmul double %28, %19
%34 = insertelement <4 x double> undef, double %6, i32 0
%35 = insertelement <4 x double> %34, double %29, i32 1
%36 = insertelement <4 x double> %35, double %31, i32 2
%37 = insertelement <4 x double> %36, double %19, i32 3
%38 = fadd <4 x double> %27, %37
%39 = insertelement <4 x double> undef, double %8, i32 0
%40 = insertelement <4 x double> %39, double %30, i32 1
%41 = insertelement <4 x double> %40, double %32, i32 2
%42 = insertelement <4 x double> %41, double %33, i32 3
%43 = fdiv <4 x double> %42, <double 6.000000e+00, double 6.000000e+00, double 6.000000e+00, double 6.000000e+00>
%44 = fadd <4 x double> %38, %43
%45 = bitcast %Dual.67* %0 to <4 x double>*
store <4 x double> %44, <4 x double>* %45, align 8
ret void
}
using ForwardDiff: Dual
using BenchmarkTools
const SUITE = BenchmarkGroup()
function exp_taylor_expr(p::Int, x)
@assert p >= 0
if p == 0
return 1
elseif p == 1
return :(1 + $x)
else
return :($(exp_taylor_expr(p - 1, x)) + $x^$p / $(factorial(p)))
end
end
@inline @generated exp_taylor(::Val{p}, x) where p = exp_taylor_expr(p, :x)
function vec_exp!(vp::Val, ys, xs)
@. ys = exp_taylor(vp, xs)
end
for p in 2:8, r in [0.0, 1.0], w in [4, 8]
x = Dual(r, (2:1.0:w)...)
SUITE["scalar p=$p width=$w r=$r"] = @benchmarkable exp_taylor($(Val(p)), $x)
end
for p in 2:8, w in [4, 8]
xs = Dual.(0:0.01:1, (2:1.0:w)...)
ys = copy(xs)
SUITE["vector p=$p width=$w"] = @benchmarkable vec_exp!($(Val(p)), $ys, $xs)
end
function print_code_native(io = STDOUT)
code_native(io, vec_exp!, (Val{3},
Vector{Dual{:t,Float64, 3}},
Vector{Dual{:t,Float64, 3}}))
println(io)
code_native(io, exp_taylor, (Val{3}, Dual{:t,Float64,3}))
end
function print_code_llvm(io = STDOUT)
code_llvm(io, vec_exp!, (Val{3},
Vector{Dual{:t,Float64, 3}},
Vector{Dual{:t,Float64, 3}}))
println(io)
code_llvm(io, exp_taylor, (Val{3}, Dual{:t,Float64,3}))
end

Benchmark Report for ForwardDiff

Job Properties

  • Time of benchmarks:
    • Target: 18 Jul 2018 - 13:27
    • Baseline: 18 Jul 2018 - 13:28
  • Package commits:
    • Target: 238626
    • Baseline: 6d0eb4
  • Julia commits:
    • Target: 9d11f6
    • Baseline: 9d11f6
  • Julia command flags:
    • Target: -O3
    • Baseline: -O3
  • Environment variables:
    • Target: None
    • Baseline: None

Results

A ratio greater than 1.0 denotes a possible regression (marked with ❌), while a ratio less than 1.0 denotes a possible improvement (marked with ✅). Only significant results - results that indicate possible regressions or improvements - are shown below (thus, an empty table means that all benchmark results remained invariant between builds).

ID time ratio memory ratio
["scalar p=2 width=4 r=0.0"] 1.13 (5%) ❌ 1.00 (1%)
["scalar p=2 width=4 r=1.0"] 1.13 (5%) ❌ 1.00 (1%)
["scalar p=2 width=8 r=0.0"] 1.18 (5%) ❌ 1.00 (1%)
["scalar p=2 width=8 r=1.0"] 1.12 (5%) ❌ 1.00 (1%)
["scalar p=3 width=4 r=0.0"] 0.79 (5%) ✅ 1.00 (1%)
["scalar p=3 width=4 r=1.0"] 0.64 (5%) ✅ 1.00 (1%)
["scalar p=3 width=8 r=0.0"] 0.81 (5%) ✅ 1.00 (1%)
["scalar p=3 width=8 r=1.0"] 0.70 (5%) ✅ 1.00 (1%)
["scalar p=4 width=4 r=0.0"] 0.89 (5%) ✅ 1.00 (1%)
["scalar p=4 width=4 r=1.0"] 0.91 (5%) ✅ 1.00 (1%)
["scalar p=4 width=8 r=0.0"] 0.88 (5%) ✅ 1.00 (1%)
["scalar p=4 width=8 r=1.0"] 0.88 (5%) ✅ 1.00 (1%)
["scalar p=5 width=4 r=0.0"] 1.09 (5%) ❌ 1.00 (1%)
["scalar p=5 width=4 r=1.0"] 1.22 (5%) ❌ 1.00 (1%)
["scalar p=5 width=8 r=1.0"] 1.12 (5%) ❌ 1.00 (1%)
["scalar p=6 width=4 r=0.0"] 1.18 (5%) ❌ 1.00 (1%)
["scalar p=6 width=4 r=1.0"] 1.41 (5%) ❌ 1.00 (1%)
["scalar p=6 width=8 r=0.0"] 1.17 (5%) ❌ 1.00 (1%)
["scalar p=6 width=8 r=1.0"] 1.30 (5%) ❌ 1.00 (1%)
["scalar p=7 width=4 r=0.0"] 1.30 (5%) ❌ 1.00 (1%)
["scalar p=7 width=4 r=1.0"] 1.52 (5%) ❌ 1.00 (1%)
["scalar p=7 width=8 r=0.0"] 1.22 (5%) ❌ 1.00 (1%)
["scalar p=7 width=8 r=1.0"] 1.38 (5%) ❌ 1.00 (1%)
["scalar p=8 width=4 r=0.0"] 1.36 (5%) ❌ 1.00 (1%)
["scalar p=8 width=4 r=1.0"] 1.59 (5%) ❌ 1.00 (1%)
["scalar p=8 width=8 r=0.0"] 1.34 (5%) ❌ 1.00 (1%)
["scalar p=8 width=8 r=1.0"] 1.45 (5%) ❌ 1.00 (1%)
["vector p=2 width=4"] 0.59 (5%) ✅ 1.00 (1%)
["vector p=2 width=8"] 0.67 (5%) ✅ 1.00 (1%)
["vector p=3 width=4"] 0.20 (5%) ✅ 1.00 (1%)
["vector p=3 width=8"] 0.24 (5%) ✅ 1.00 (1%)
["vector p=4 width=4"] 0.73 (5%) ✅ 1.00 (1%)
["vector p=4 width=8"] 0.72 (5%) ✅ 1.00 (1%)
["vector p=5 width=4"] 1.13 (5%) ❌ 1.00 (1%)
["vector p=5 width=8"] 1.08 (5%) ❌ 1.00 (1%)
["vector p=6 width=4"] 1.36 (5%) ❌ 1.00 (1%)
["vector p=6 width=8"] 1.28 (5%) ❌ 1.00 (1%)
["vector p=7 width=4"] 1.51 (5%) ❌ 1.00 (1%)
["vector p=7 width=8"] 1.35 (5%) ❌ 1.00 (1%)
["vector p=8 width=4"] 1.56 (5%) ❌ 1.00 (1%)
["vector p=8 width=8"] 1.43 (5%) ❌ 1.00 (1%)

Benchmark Group List

Here's a list of all the benchmark groups executed by this job:

  • []

Julia versioninfo

Target

Julia Version 0.6.4
Commit 9d11f62bcb (2018-07-09 19:09 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz
  WORD_SIZE: 64
           Ubuntu 16.04.4 LTS
  uname: Linux 4.4.0-119-generic #143-Ubuntu SMP Mon Apr 2 16:08:24 UTC 2018 x86_64 x86_64
Memory: 125.78757095336914 GB (54251.66015625 MB free)
Uptime: 3.540706e6 sec
Load Avg:  1.28515625  0.53466796875  0.29931640625
Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz: 
          speed         user         nice          sys         idle          irq
#1-48  2899 MHz  211402103 s      11362 s  183950510 s  16591975349 s          0 s

  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, broadwell)

Baseline

Julia Version 0.6.4
Commit 9d11f62bcb (2018-07-09 19:09 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz
  WORD_SIZE: 64
           Ubuntu 16.04.4 LTS
  uname: Linux 4.4.0-119-generic #143-Ubuntu SMP Mon Apr 2 16:08:24 UTC 2018 x86_64 x86_64
Memory: 125.78757095336914 GB (54186.453125 MB free)
Uptime: 3.540757e6 sec
Load Avg:  1.27294921875  0.65380859375  0.3544921875
Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz: 
          speed         user         nice          sys         idle          irq
#1-48  2861 MHz  211407455 s      11362 s  183951210 s  16592215014 s          0 s

  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, broadwell)
--- bench_exp_taylor-HEAD~0.ll 2018-07-18 13:29:20.531602785 -0700
+++ bench_exp_taylor-HEAD~1.ll 2018-07-18 13:29:41.447953486 -0700
@@ -1,7 +1,7 @@
-Revision: 6d0eb49e3ef74d2e97c7075af088f3ef1590a43e
+Revision: 3f62885ecf2523c7e0707863a2c11ccf93fccc5f
-define i8** @"julia_vec_exp!_63127"(i8** dereferenceable(40), i8** dereferenceable(40)) #0 !dbg !5 {
+define i8** @"julia_vec_exp!_63126"(i8** dereferenceable(40), i8** dereferenceable(40)) #0 !dbg !5 {
top:
%2 = alloca [1 x %OneTo.3], align 8
%3 = alloca <4 x i64>, align 8
@@ -23,7 +23,7 @@
%14 = select i1 %13, i64 0, i64 %12
%15 = getelementptr inbounds [1 x %OneTo.3], [1 x %OneTo.3]* %2, i64 0, i64 0, i32 0
store i64 %14, i64* %15, align 8
- call void @julia_check_broadcast_shape_63128([1 x %OneTo.3]* nocapture nonnull readonly %shape, [1 x %OneTo.3]* nocapture nonnull readonly %2)
+ call void @julia_check_broadcast_shape_63127([1 x %OneTo.3]* nocapture nonnull readonly %shape, [1 x %OneTo.3]* nocapture nonnull readonly %2)
%16 = icmp slt i64 %8, 1
br i1 %16, label %L112, label %if11.lr.ph
@@ -49,7 +49,7 @@
%24 = bitcast double* %.elt.us to <4 x i64>*
%25 = load <4 x i64>, <4 x i64>* %24, align 8
store <4 x i64> %25, <4 x i64>* %val_2, align 8
- call void @julia_exp_taylor_63129(%Dual.67* noalias nocapture nonnull sret %tmpcast81, %Dual.67* nocapture nonnull readonly %tmpcast)
+ call void @julia_exp_taylor_63128(%Dual.67* noalias nocapture nonnull sret %tmpcast81, %Dual.67* nocapture nonnull readonly %tmpcast)
%26 = load <4 x i64>, <4 x i64>* %3, align 8
%27 = load %Dual.67*, %Dual.67** %22, align 8
%28 = getelementptr %Dual.67, %Dual.67* %27, i64 %"i#804.063.us"
@@ -74,7 +74,7 @@
%32 = load <4 x i64>*, <4 x i64>** %31, align 8
%33 = load <4 x i64>, <4 x i64>* %32, align 8
store <4 x i64> %33, <4 x i64>* %val_2, align 8
- call void @julia_exp_taylor_63129(%Dual.67* noalias nocapture nonnull sret %tmpcast81, %Dual.67* nocapture nonnull readonly %tmpcast)
+ call void @julia_exp_taylor_63128(%Dual.67* noalias nocapture nonnull sret %tmpcast81, %Dual.67* nocapture nonnull readonly %tmpcast)
%34 = load <4 x i64>, <4 x i64>* %3, align 8
%35 = load %Dual.67*, %Dual.67** %22, align 8
%36 = getelementptr %Dual.67, %Dual.67* %35, i64 %"i#804.063"
@@ -86,34 +86,80 @@
}
-define void @julia_exp_taylor_63129(%Dual.67* noalias nocapture sret, %Dual.67* nocapture readonly dereferenceable(32)) #0 !dbg !5 {
+define void @julia_exp_taylor_63128(%Dual.67* noalias nocapture sret, %Dual.67* nocapture readonly dereferenceable(32)) #0 !dbg !5 {
top:
- %2 = alloca <4 x double>, align 8
- %tmpcast = bitcast <4 x double>* %2 to %Dual.67*
- %3 = alloca <4 x double>, align 8
- %tmpcast48 = bitcast <4 x double>* %3 to %Dual.67*
- call void @julia_literal_pow_63130(%Dual.67* noalias nocapture nonnull sret %tmpcast, %Dual.67* nocapture nonnull readonly %1, i8** inttoptr (i64 140126153960624 to i8**))
- %4 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 0
- %5 = load double, double* %4, align 8
- %6 = fadd double %5, 1.000000e+00
- %7 = load <4 x double>, <4 x double>* %2, align 8
- %8 = fmul <4 x double> %7, <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
- %9 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 0
- %10 = load double, double* %9, align 8
- %11 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 1
- %12 = load double, double* %11, align 8
- %13 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 2
- %14 = load double, double* %13, align 8
- call void @julia_literal_pow_63131(%Dual.67* noalias nocapture nonnull sret %tmpcast48, %Dual.67* nocapture nonnull readonly %1, i8** inttoptr (i64 140126153962160 to i8**))
- %15 = insertelement <4 x double> undef, double %6, i32 0
- %16 = insertelement <4 x double> %15, double %10, i32 1
- %17 = insertelement <4 x double> %16, double %12, i32 2
- %18 = insertelement <4 x double> %17, double %14, i32 3
- %19 = fadd <4 x double> %18, %8
- %20 = load <4 x double>, <4 x double>* %3, align 8
- %21 = fdiv <4 x double> %20, <double 6.000000e+00, double 6.000000e+00, double 6.000000e+00, double 6.000000e+00>
- %22 = fadd <4 x double> %19, %21
- %23 = bitcast %Dual.67* %0 to <4 x double>*
- store <4 x double> %22, <4 x double>* %23, align 8
+ %2 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 0
+ %3 = load double, double* %2, align 8
+ %pow2 = fmul double %3, %3
+ %4 = fadd double %3, 2.000000e+00
+ %notlhs = fcmp ord double %pow2, 0.000000e+00
+ %notrhs = fcmp uno double %4, 0.000000e+00
+ %5 = or i1 %notrhs, %notlhs
+ br i1 %5, label %L16, label %if
+
+if: ; preds = %top
+ call void @jl_throw(i8** inttoptr (i64 139959341768328 to i8**))
+ unreachable
+
+L16: ; preds = %top
+ %6 = fadd double %3, 1.000000e+00
+ %notlhs28 = fcmp ord double %3, 0.000000e+00
+ %notrhs29 = fcmp uno double %6, 0.000000e+00
+ %7 = or i1 %notlhs28, %notrhs29
+ br i1 %7, label %L33, label %if15
+
+if15: ; preds = %L16
+ call void @jl_throw(i8** inttoptr (i64 139959341768328 to i8**))
+ unreachable
+
+L33: ; preds = %L16
+ %8 = call double @llvm.pow.f64(double %3, double 3.000000e+00)
+ %9 = fadd double %3, 3.000000e+00
+ %notlhs33 = fcmp ord double %8, 0.000000e+00
+ %notrhs34 = fcmp uno double %9, 0.000000e+00
+ %10 = or i1 %notlhs33, %notrhs34
+ br i1 %10, label %L134, label %if18
+
+if18: ; preds = %L33
+ call void @jl_throw(i8** inttoptr (i64 139959341768328 to i8**))
+ unreachable
+
+L134: ; preds = %L33
+ %11 = fmul double %3, 2.000000e+00
+ %12 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 0
+ %13 = bitcast double* %12 to <2 x double>*
+ %14 = load <2 x double>, <2 x double>* %13, align 8
+ %15 = insertelement <2 x double> undef, double %11, i32 0
+ %16 = insertelement <2 x double> %15, double %11, i32 1
+ %17 = fmul <2 x double> %16, %14
+ %18 = getelementptr inbounds %Dual.67, %Dual.67* %1, i64 0, i32 1, i32 0, i64 2
+ %19 = load double, double* %18, align 8
+ %20 = fmul double %11, %19
+ %21 = insertelement <4 x double> undef, double %pow2, i32 0
+ %22 = extractelement <2 x double> %17, i32 0
+ %23 = insertelement <4 x double> %21, double %22, i32 1
+ %24 = extractelement <2 x double> %17, i32 1
+ %25 = insertelement <4 x double> %23, double %24, i32 2
+ %26 = insertelement <4 x double> %25, double %20, i32 3
+ %27 = fmul <4 x double> %26, <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
+ %28 = fmul double %pow2, 3.000000e+00
+ %29 = extractelement <2 x double> %14, i32 0
+ %30 = fmul double %28, %29
+ %31 = extractelement <2 x double> %14, i32 1
+ %32 = fmul double %28, %31
+ %33 = fmul double %28, %19
+ %34 = insertelement <4 x double> undef, double %6, i32 0
+ %35 = insertelement <4 x double> %34, double %29, i32 1
+ %36 = insertelement <4 x double> %35, double %31, i32 2
+ %37 = insertelement <4 x double> %36, double %19, i32 3
+ %38 = fadd <4 x double> %27, %37
+ %39 = insertelement <4 x double> undef, double %8, i32 0
+ %40 = insertelement <4 x double> %39, double %30, i32 1
+ %41 = insertelement <4 x double> %40, double %32, i32 2
+ %42 = insertelement <4 x double> %41, double %33, i32 3
+ %43 = fdiv <4 x double> %42, <double 6.000000e+00, double 6.000000e+00, double 6.000000e+00, double 6.000000e+00>
+ %44 = fadd <4 x double> %38, %43
+ %45 = bitcast %Dual.67* %0 to <4 x double>*
+ store <4 x double> %44, <4 x double>* %45, align 8
ret void
}
--- bench_exp_taylor-HEAD~0.code_native 2018-07-18 13:28:40.990930787 -0700
+++ bench_exp_taylor-HEAD~1.code_native 2018-07-18 13:29:00.215257515 -0700
@@ -1,4 +1,4 @@
-Revision: 2386264789419e07e5f4c29666c001db3d2a232d
+Revision: 6d0eb49e3ef74d2e97c7075af088f3ef1590a43e
.text
Filename: bench_exp_taylor.jl
@@ -106,47 +106,112 @@
pushq %r15
pushq %r14
pushq %rbx
- andq $-32, %rsp
- subq $160, %rsp
- movq %rsi, %r15
- movq %rdi, %r14
+ subq $56, %rsp
+ movq %rsi, %rbx
+ movq %rdi, %r15
+ movabsq $140081698299528, %r14 # imm = 0x7F674FDDDE88
Source line: 17
- movabsq $literal_pow, %rax
- leaq 96(%rsp), %rdi
- movabsq $139733918657712, %rbx # imm = 0x7F165695ACB0
- movq %rbx, %rdx
- callq *%rax
- vmovsd (%r15), %xmm0 # xmm0 = mem[0],zero
- movabsq $139733179072216, %rax # imm = 0x7F162A807ED8
- vaddsd (%rax), %xmm0, %xmm0
- vmovapd %xmm0, 16(%rsp)
- movabsq $139733179072224, %rax # imm = 0x7F162A807EE0
- vbroadcastsd (%rax), %ymm0
- vmulpd 96(%rsp), %ymm0, %ymm0
- vmovapd %ymm0, 32(%rsp)
- addq $1536, %rbx # imm = 0x600
- movabsq $literal_pow, %rax
- leaq 64(%rsp), %rdi
- movq %r15, %rsi
- movq %rbx, %rdx
- vzeroupper
+ vmovsd (%rbx), %xmm0 # xmm0 = mem[0],zero
+Source line: 714
+ vmulsd %xmm0, %xmm0, %xmm1
+ movabsq $140080955539424, %rax # imm = 0x7F6723983FE0
+ vaddsd (%rax), %xmm0, %xmm2
+Source line: 315
+ vucomisd %xmm2, %xmm2
+ jp L67
+ vucomisd %xmm1, %xmm1
+ jp L359
+L67:
+ vmovapd %xmm1, -80(%rbp)
+ movabsq $140080955539432, %rax # imm = 0x7F6723983FE8
+Source line: 714
+ vaddsd (%rax), %xmm0, %xmm1
+Source line: 315
+ vucomisd %xmm0, %xmm0
+ jnp L102
+ vucomisd %xmm1, %xmm1
+ jnp L374
+L102:
+ vmovapd %xmm1, -48(%rbp)
+ movabsq $140080955539440, %rax # imm = 0x7F6723983FF0
+Source line: 714
+ vmovsd (%rax), %xmm1 # xmm1 = mem[0],zero
+ vmovsd %xmm1, -32(%rbp)
+ movabsq $__pow, %rax
+ vmovapd %xmm0, -64(%rbp)
callq *%rax
- movabsq $139733179072232, %rax # imm = 0x7F162A807EE8
- vbroadcastsd (%rax), %ymm0
- vmovapd 64(%rsp), %ymm1
- vdivpd %ymm0, %ymm1, %ymm0
- vmovapd 16(%rsp), %xmm1
- vmovhpd 8(%r15), %xmm1, %xmm1 # xmm1 = xmm1[0],mem[0]
- vinsertf128 $1, 16(%r15), %ymm1, %ymm1
- vaddpd 32(%rsp), %ymm1, %ymm1
+ vmovsd -32(%rbp), %xmm7 # xmm7 = mem[0],zero
+ vmovapd -64(%rbp), %xmm6
+Source line: 315
+ vucomisd %xmm0, %xmm0
+ jnp L173
+Source line: 714
+ vaddsd %xmm7, %xmm6, %xmm1
+ vucomisd %xmm1, %xmm1
+ jnp L389
+Source line: 155
+L173:
+ vmovupd 8(%rbx), %xmm1
+ vaddpd %xmm1, %xmm1, %xmm2
+ vmovsd 24(%rbx), %xmm3 # xmm3 = mem[0],zero
+ vaddsd %xmm3, %xmm3, %xmm4
+Source line: 155
+ vmovddup %xmm6, %xmm5 # xmm5 = xmm6[0,0]
+ vmulpd %xmm2, %xmm5, %xmm2
+ vmulsd %xmm4, %xmm6, %xmm4
+Source line: 17
+ vpermilpd $1, %xmm2, %xmm5 # xmm5 = xmm2[1,0]
+ vunpcklpd %xmm4, %xmm5, %xmm4 # xmm4 = xmm5[0],xmm4[0]
+ vmovapd -80(%rbp), %xmm6
+ vunpcklpd %xmm2, %xmm6, %xmm2 # xmm2 = xmm6[0],xmm2[0]
+ vinsertf128 $1, %xmm4, %ymm2, %ymm2
+ movabsq $140080955539448, %rax # imm = 0x7F6723983FF8
+ vbroadcastsd (%rax), %ymm4
+ vmulpd %ymm4, %ymm2, %ymm8
+Source line: 155
+ vmulsd %xmm7, %xmm1, %xmm4
+ vpermilpd $1, %xmm1, %xmm5 # xmm5 = xmm1[1,0]
+ vmulsd %xmm7, %xmm5, %xmm2
+ vmulsd %xmm7, %xmm3, %xmm7
+Source line: 155
+ vmulsd %xmm4, %xmm6, %xmm4
+ vmulsd %xmm2, %xmm6, %xmm2
+ vmulsd %xmm7, %xmm6, %xmm7
+Source line: 17
+ vunpcklpd %xmm3, %xmm5, %xmm3 # xmm3 = xmm5[0],xmm3[0]
+ vmovapd -48(%rbp), %xmm5
+ vunpcklpd %xmm1, %xmm5, %xmm1 # xmm1 = xmm5[0],xmm1[0]
+ vunpcklpd %xmm7, %xmm2, %xmm5 # xmm5 = xmm2[0],xmm7[0]
+ vunpcklpd %xmm4, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm4[0]
+ vinsertf128 $1, %xmm5, %ymm0, %ymm0
+ movabsq $140080955539456, %rax # imm = 0x7F6723984000
+ vbroadcastsd (%rax), %ymm4
+ vdivpd %ymm4, %ymm0, %ymm0
+ vinsertf128 $1, %xmm3, %ymm1, %ymm1
+ vaddpd %ymm1, %ymm8, %ymm1
vaddpd %ymm0, %ymm1, %ymm0
- vmovupd %ymm0, (%r14)
- movq %r14, %rax
- leaq -24(%rbp), %rsp
+ vmovupd %ymm0, (%r15)
+ movq %r15, %rax
+ addq $56, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
vzeroupper
retq
- nopl (%rax)
+Source line: 315
+L359:
+ movabsq $jl_throw, %rax
+ movq %r14, %rdi
+ callq *%rax
+Source line: 315
+L374:
+ movabsq $jl_throw, %rax
+ movq %r14, %rdi
+ callq *%rax
+Source line: 315
+L389:
+ movabsq $jl_throw, %rax
+ movq %r14, %rdi
+ callq *%rax
+ nopw %cs:(%rax,%rax)
Revision: 6d0eb49e3ef74d2e97c7075af088f3ef1590a43e
.text
Filename: bench_log1mx_taylor.jl
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $136, %rsp
movq %rdi, -64(%rbp)
Source line: 155
vmovupd 8(%rdx), %xmm1
movabsq $140352823859392, %rax # imm = 0x7FA6703598C0
vmovapd (%rax), %xmm0
vmovapd %xmm1, -144(%rbp)
vxorpd %xmm0, %xmm1, %xmm1
vmovsd 24(%rdx), %xmm2 # xmm2 = mem[0],zero
vmovapd %xmm2, -128(%rbp)
vxorpd %xmm0, %xmm2, %xmm3
Source line: 7
vmovsd (%rdx), %xmm5 # xmm5 = mem[0],zero
vmovq %xmm5, %rax
movabsq $-9223372036854775808, %rbx # imm = 0x8000000000000000
xorq %rax, %rbx
Source line: 65
testq %rsi, %rsi
movl $1, %r12d
cmovgq %rsi, %r12
Source line: 221
subq $2, %r12
seto -42(%rbp)
Source line: 164
incq %r12
seto -41(%rbp)
xorl %eax, %eax
movabsq $__pow, %r15
vmovsd %xmm5, -56(%rbp)
jmp L148
nopw (%rax,%rax)
L144:
movq -72(%rbp), %rax
Source line: 222
L148:
cmpb $0, -42(%rbp)
jne L442
Source line: 165
cmpb $0, -41(%rbp)
jne L470
testq %r12, %r12
Source line: 68
jle L476
Source line: 66
cmpq $1, %rax
je L476
addq $1, %rax
movq %rax, -72(%rbp)
movl $2, %r13d
nopl (%rax)
L208:
vmovapd %xmm3, -160(%rbp)
vmovapd %xmm1, -176(%rbp)
Source line: 716
vxorps %xmm0, %xmm0, %xmm0
vcvtsi2sdq %r13, %xmm0, %xmm1
Source line: 714
vmovaps %xmm1, -112(%rbp)
vmovapd %xmm5, %xmm0
callq *%r15
vmovapd -112(%rbp), %xmm1
vmovsd -56(%rbp), %xmm4 # xmm4 = mem[0],zero
Source line: 315
vucomisd %xmm0, %xmm0
jnp L275
Source line: 714
vaddsd %xmm4, %xmm1, %xmm2
vucomisd %xmm2, %xmm2
jnp L511
L275:
vmovsd %xmm0, -80(%rbp)
Source line: 716
leaq -1(%r13), %r14
vxorps %xmm0, %xmm0, %xmm0
vcvtsi2sdq %r14, %xmm0, %xmm1
Source line: 714
vmovsd %xmm1, -88(%rbp)
vmovapd %xmm4, %xmm0
callq *%r15
vmovsd -56(%rbp), %xmm5 # xmm5 = mem[0],zero
vaddsd -88(%rbp), %xmm5, %xmm1
Source line: 315
vucomisd %xmm1, %xmm1
jp L331
vucomisd %xmm0, %xmm0
jp L540
L331:
vmovapd -112(%rbp), %xmm6
Source line: 155
vmovddup %xmm6, %xmm1 # xmm1 = xmm6[0,0]
vmulpd -144(%rbp), %xmm1, %xmm2
vmulsd -128(%rbp), %xmm6, %xmm3
Source line: 155
vmovddup %xmm0, %xmm4 # xmm4 = xmm0[0,0]
vmulpd %xmm2, %xmm4, %xmm2
vmulsd %xmm3, %xmm0, %xmm0
Source line: 155
vdivpd %xmm1, %xmm2, %xmm1
vdivsd %xmm6, %xmm0, %xmm0
Source line: 9
vmovsd -80(%rbp), %xmm2 # xmm2 = mem[0],zero
vdivsd %xmm6, %xmm2, %xmm2
vmovapd -176(%rbp), %xmm3
Source line: 155
vsubpd %xmm1, %xmm3, %xmm3
vmovapd %xmm3, %xmm1
vmovapd -160(%rbp), %xmm3
vsubsd %xmm0, %xmm3, %xmm3
Source line: 9
vmovq %rbx, %xmm0
vsubsd %xmm2, %xmm0, %xmm0
vmovq %xmm0, %rbx
Source line: 71
addq $1, %r13
cmpq %r12, %r14
jl L208
jmp L144
Source line: 66
L442:
cmpq $1, %rax
je L476
Source line: 222
movabsq $jl_throw, %rax
movabsq $140353566625328, %rdi # imm = 0x7FA69C7B4E30
callq *%rax
Source line: 66
L470:
cmpq $1, %rax
jne L569
L476:
movq -64(%rbp), %rax
Source line: 11
movq %rbx, (%rax)
vmovupd %xmm1, 8(%rax)
vmovsd %xmm3, 24(%rax)
addq $136, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
L511:
movabsq $140353566625328, %rdi # imm = 0x7FA69C7B4E30
Source line: 315
addq $4184, %rdi # imm = 0x1058
movabsq $jl_throw, %rax
callq *%rax
L540:
movabsq $140353566625328, %rdi # imm = 0x7FA69C7B4E30
Source line: 315
addq $4184, %rdi # imm = 0x1058
movabsq $jl_throw, %rax
callq *%rax
Source line: 165
L569:
movabsq $jl_throw, %rax
movabsq $140353566625328, %rdi # imm = 0x7FA69C7B4E30
callq *%rax
nop
Revision: 6d0eb49e3ef74d2e97c7075af088f3ef1590a43e
define void @julia_log1mx_taylor_62988(%Dual.67* noalias nocapture sret, i64, %Dual.67* nocapture readonly dereferenceable(32)) #0 !dbg !5 {
top:
%3 = getelementptr inbounds %Dual.67, %Dual.67* %2, i64 0, i32 1, i32 0, i64 0
%4 = bitcast double* %3 to <2 x double>*
%5 = load <2 x double>, <2 x double>* %4, align 8
%6 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %5
%7 = getelementptr inbounds %Dual.67, %Dual.67* %2, i64 0, i32 1, i32 0, i64 2
%8 = load double, double* %7, align 8
%9 = fsub double -0.000000e+00, %8
%10 = getelementptr inbounds %Dual.67, %Dual.67* %2, i64 0, i32 0
%11 = load double, double* %10, align 8
%12 = fsub double -0.000000e+00, %11
%13 = bitcast double %12 to i64
%14 = icmp sgt i64 %1, 1
%15 = select i1 %14, i64 %1, i64 1
%16 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %15, i64 2)
%17 = extractvalue { i64, i1 } %16, 1
%18 = extractvalue { i64, i1 } %16, 0
%19 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %18, i64 1)
%20 = extractvalue { i64, i1 } %19, 0
%21 = extractvalue { i64, i1 } %19, 1
%22 = icmp slt i64 %20, 1
br label %L26.outer
L26.outer.loopexit: ; preds = %L181
br label %L26.outer
L26.outer: ; preds = %L26.outer.loopexit, %top
%"#temp#.0.ph" = phi i64 [ 0, %top ], [ %28, %L26.outer.loopexit ]
%y.sroa.0.0.ph = phi i64 [ %13, %top ], [ %55, %L26.outer.loopexit ]
%y.sroa.4.sroa.5.0.ph = phi double [ %9, %top ], [ %52, %L26.outer.loopexit ]
%23 = phi <2 x double> [ %6, %top ], [ %51, %L26.outer.loopexit ]
br i1 %17, label %L26.outer.split.us, label %L26.outer.L26.outer.split_crit_edge
L26.outer.L26.outer.split_crit_edge: ; preds = %L26.outer
br i1 %21, label %L26.outer.split.split.us, label %L26.outer.split.L26.outer.split.split_crit_edge
L26.outer.split.us: ; preds = %L26.outer
%24 = icmp eq i64 %"#temp#.0.ph", 1
br i1 %24, label %L243, label %if16
L26.outer.split.L26.outer.split.split_crit_edge: ; preds = %L26.outer.L26.outer.split_crit_edge
br i1 %22, label %L243.loopexit, label %L26.outer.split.split.L26.outer.split.split.split_crit_edge
L26.outer.split.split.us: ; preds = %L26.outer.L26.outer.split_crit_edge
%25 = icmp eq i64 %"#temp#.0.ph", 1
br i1 %25, label %L243, label %if17
L26.outer.split.split.L26.outer.split.split.split_crit_edge: ; preds = %L26.outer.split.L26.outer.split.split_crit_edge
%26 = icmp eq i64 %"#temp#.0.ph", 1
br i1 %26, label %L243.loopexit, label %if21.lr.ph
L243.loopexit: ; preds = %L26.outer.split.L26.outer.split.split_crit_edge, %L26.outer.split.split.L26.outer.split.split.split_crit_edge
br label %L243
L243: ; preds = %L243.loopexit, %L26.outer.split.us, %L26.outer.split.split.us
%y.sroa.0.0..sroa_cast39 = bitcast %Dual.67* %0 to i64*
store i64 %y.sroa.0.0.ph, i64* %y.sroa.0.0..sroa_cast39, align 8
%y.sroa.4.sroa.0.0.y.sroa.4.0..sroa_cast45.sroa_idx = getelementptr inbounds %Dual.67, %Dual.67* %0, i64 0, i32 1, i32 0, i64 0
%27 = bitcast double* %y.sroa.4.sroa.0.0.y.sroa.4.0..sroa_cast45.sroa_idx to <2 x double>*
store <2 x double> %23, <2 x double>* %27, align 8
%y.sroa.4.sroa.5.0.y.sroa.4.0..sroa_cast45.sroa_idx70 = getelementptr inbounds %Dual.67, %Dual.67* %0, i64 0, i32 1, i32 0, i64 2
store double %y.sroa.4.sroa.5.0.ph, double* %y.sroa.4.sroa.5.0.y.sroa.4.0..sroa_cast45.sroa_idx70, align 8
ret void
if16: ; preds = %L26.outer.split.us
call void @jl_throw(i8** inttoptr (i64 139920386919984 to i8**))
unreachable
if17: ; preds = %L26.outer.split.split.us
call void @jl_throw(i8** inttoptr (i64 139920386919984 to i8**))
unreachable
if21.lr.ph: ; preds = %L26.outer.split.split.L26.outer.split.split.split_crit_edge
%28 = add i64 %"#temp#.0.ph", 1
br label %if21
if21: ; preds = %if21.lr.ph, %L181
%"i#665.0175" = phi i64 [ 0, %if21.lr.ph ], [ %35, %L181 ]
%y.sroa.4.sroa.5.1174 = phi double [ %y.sroa.4.sroa.5.0.ph, %if21.lr.ph ], [ %52, %L181 ]
%y.sroa.0.1173 = phi i64 [ %y.sroa.0.0.ph, %if21.lr.ph ], [ %55, %L181 ]
%29 = phi <2 x double> [ %23, %if21.lr.ph ], [ %51, %L181 ]
%30 = add nuw nsw i64 %"i#665.0175", 2
%31 = sitofp i64 %30 to double
%32 = call double @llvm.pow.f64(double %11, double %31)
%33 = fadd double %31, %11
%notlhs = fcmp ord double %32, 0.000000e+00
%notrhs = fcmp uno double %33, 0.000000e+00
%34 = or i1 %notlhs, %notrhs
br i1 %34, label %L130, label %if22
if22: ; preds = %if21
call void @jl_throw(i8** inttoptr (i64 139920386924168 to i8**))
unreachable
L130: ; preds = %if21
%35 = add nuw nsw i64 %"i#665.0175", 1
%36 = sitofp i64 %35 to double
%37 = call double @llvm.pow.f64(double %11, double %36)
%38 = fadd double %36, %11
%notlhs30 = fcmp ord double %37, 0.000000e+00
%notrhs31 = fcmp uno double %38, 0.000000e+00
%39 = or i1 %notrhs31, %notlhs30
br i1 %39, label %L181, label %if26
L181: ; preds = %L130
%40 = insertelement <2 x double> undef, double %31, i32 0
%41 = insertelement <2 x double> %40, double %31, i32 1
%42 = fmul <2 x double> %41, %5
%43 = fmul double %31, %8
%44 = insertelement <2 x double> undef, double %37, i32 0
%45 = insertelement <2 x double> %44, double %37, i32 1
%46 = fmul <2 x double> %45, %42
%47 = fmul double %37, %43
%48 = fdiv <2 x double> %46, %41
%49 = fdiv double %47, %31
%50 = fdiv double %32, %31
%51 = fsub <2 x double> %29, %48
%52 = fsub double %y.sroa.4.sroa.5.1174, %49
%53 = bitcast i64 %y.sroa.0.1173 to double
%54 = fsub double %53, %50
%55 = bitcast double %54 to i64
%56 = icmp slt i64 %35, %20
br i1 %56, label %if21, label %L26.outer.loopexit
if26: ; preds = %L130
call void @jl_throw(i8** inttoptr (i64 139920386924168 to i8**))
unreachable
}
Revision: 3f62885ecf2523c7e0707863a2c11ccf93fccc5f
.text
Filename: bench_log1mx_taylor.jl
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $136, %rsp
movq %rdi, -64(%rbp)
Source line: 155
vmovupd 8(%rdx), %xmm1
movabsq $140209366140704, %rax # imm = 0x7F8509768720
vmovapd (%rax), %xmm0
vmovapd %xmm1, -144(%rbp)
vxorpd %xmm0, %xmm1, %xmm1
vmovsd 24(%rdx), %xmm2 # xmm2 = mem[0],zero
vmovapd %xmm2, -128(%rbp)
vxorpd %xmm0, %xmm2, %xmm3
Source line: 7
vmovsd (%rdx), %xmm4 # xmm4 = mem[0],zero
vmovq %xmm4, %rax
movabsq $-9223372036854775808, %rbx # imm = 0x8000000000000000
xorq %rax, %rbx
Source line: 65
testq %rsi, %rsi
movl $1, %r12d
cmovgq %rsi, %r12
Source line: 221
subq $2, %r12
seto -42(%rbp)
Source line: 164
incq %r12
seto -41(%rbp)
xorl %eax, %eax
movabsq $__pow, %r15
vmovsd %xmm4, -56(%rbp)
jmp L148
nopw (%rax,%rax)
L144:
movq -72(%rbp), %rax
Source line: 222
L148:
cmpb $0, -42(%rbp)
jne L438
Source line: 165
cmpb $0, -41(%rbp)
jne L466
testq %r12, %r12
Source line: 68
jle L472
Source line: 66
cmpq $1, %rax
je L472
addq $1, %rax
movq %rax, -72(%rbp)
movl $2, %r13d
nopl (%rax)
L208:
vmovapd %xmm3, -160(%rbp)
vmovapd %xmm1, -176(%rbp)
Source line: 716
vxorps %xmm0, %xmm0, %xmm0
vcvtsi2sdq %r13, %xmm0, %xmm1
Source line: 714
vmovaps %xmm1, -112(%rbp)
vmovapd %xmm4, %xmm0
callq *%r15
vmovapd -112(%rbp), %xmm1
vmovsd -56(%rbp), %xmm4 # xmm4 = mem[0],zero
Source line: 315
vucomisd %xmm0, %xmm0
jnp L275
Source line: 714
vaddsd %xmm4, %xmm1, %xmm2
vucomisd %xmm2, %xmm2
jnp L507
L275:
vmovsd %xmm0, -80(%rbp)
Source line: 716
leaq -1(%r13), %r14
vxorps %xmm0, %xmm0, %xmm0
vcvtsi2sdq %r14, %xmm0, %xmm1
Source line: 714
vmovsd %xmm1, -88(%rbp)
vmovapd %xmm4, %xmm0
callq *%r15
vmovsd -56(%rbp), %xmm4 # xmm4 = mem[0],zero
vaddsd -88(%rbp), %xmm4, %xmm1
Source line: 315
vucomisd %xmm1, %xmm1
jp L331
vucomisd %xmm0, %xmm0
jp L536
L331:
vmovapd -112(%rbp), %xmm3
Source line: 411
vmulsd %xmm0, %xmm3, %xmm0
Source line: 155
vmovddup %xmm0, %xmm1 # xmm1 = xmm0[0,0]
vmulpd -144(%rbp), %xmm1, %xmm1
vmulsd -128(%rbp), %xmm0, %xmm0
Source line: 155
vmovddup %xmm3, %xmm2 # xmm2 = xmm3[0,0]
vdivpd %xmm2, %xmm1, %xmm1
vdivsd %xmm3, %xmm0, %xmm0
Source line: 9
vmovsd -80(%rbp), %xmm2 # xmm2 = mem[0],zero
vdivsd %xmm3, %xmm2, %xmm2
vmovapd -176(%rbp), %xmm3
Source line: 155
vsubpd %xmm1, %xmm3, %xmm3
vmovapd %xmm3, %xmm1
vmovapd -160(%rbp), %xmm3
vsubsd %xmm0, %xmm3, %xmm3
Source line: 9
vmovq %rbx, %xmm0
vsubsd %xmm2, %xmm0, %xmm0
vmovq %xmm0, %rbx
Source line: 71
addq $1, %r13
cmpq %r12, %r14
jl L208
jmp L144
Source line: 66
L438:
cmpq $1, %rax
je L472
Source line: 222
movabsq $jl_throw, %rax
movabsq $140210108911152, %rdi # imm = 0x7F8535BC4E30
callq *%rax
Source line: 66
L466:
cmpq $1, %rax
jne L565
L472:
movq -64(%rbp), %rax
Source line: 11
movq %rbx, (%rax)
vmovupd %xmm1, 8(%rax)
vmovsd %xmm3, 24(%rax)
addq $136, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
L507:
movabsq $140210108911152, %rdi # imm = 0x7F8535BC4E30
Source line: 315
addq $4184, %rdi # imm = 0x1058
movabsq $jl_throw, %rax
callq *%rax
L536:
movabsq $140210108911152, %rdi # imm = 0x7F8535BC4E30
Source line: 315
addq $4184, %rdi # imm = 0x1058
movabsq $jl_throw, %rax
callq *%rax
Source line: 165
L565:
movabsq $jl_throw, %rax
movabsq $140210108911152, %rdi # imm = 0x7F8535BC4E30
callq *%rax
nopl (%rax,%rax)
Revision: 3f62885ecf2523c7e0707863a2c11ccf93fccc5f
define void @julia_log1mx_taylor_62984(%Dual.66* noalias nocapture sret, i64, %Dual.66* nocapture readonly dereferenceable(32)) #0 !dbg !5 {
top:
%3 = getelementptr inbounds %Dual.66, %Dual.66* %2, i64 0, i32 1, i32 0, i64 0
%4 = bitcast double* %3 to <2 x double>*
%5 = load <2 x double>, <2 x double>* %4, align 8
%6 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %5
%7 = getelementptr inbounds %Dual.66, %Dual.66* %2, i64 0, i32 1, i32 0, i64 2
%8 = load double, double* %7, align 8
%9 = fsub double -0.000000e+00, %8
%10 = getelementptr inbounds %Dual.66, %Dual.66* %2, i64 0, i32 0
%11 = load double, double* %10, align 8
%12 = fsub double -0.000000e+00, %11
%13 = bitcast double %12 to i64
%14 = icmp sgt i64 %1, 1
%15 = select i1 %14, i64 %1, i64 1
%16 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %15, i64 2)
%17 = extractvalue { i64, i1 } %16, 1
%18 = extractvalue { i64, i1 } %16, 0
%19 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %18, i64 1)
%20 = extractvalue { i64, i1 } %19, 0
%21 = extractvalue { i64, i1 } %19, 1
%22 = icmp slt i64 %20, 1
br label %L26.outer
L26.outer.loopexit: ; preds = %L116
br label %L26.outer
L26.outer: ; preds = %L26.outer.loopexit, %top
%"#temp#.0.ph" = phi i64 [ 0, %top ], [ %28, %L26.outer.loopexit ]
%y.sroa.0.0.ph = phi i64 [ %13, %top ], [ %54, %L26.outer.loopexit ]
%y.sroa.4.sroa.5.0.ph = phi double [ %9, %top ], [ %51, %L26.outer.loopexit ]
%23 = phi <2 x double> [ %6, %top ], [ %50, %L26.outer.loopexit ]
br i1 %17, label %L26.outer.split.us, label %L26.outer.L26.outer.split_crit_edge
L26.outer.L26.outer.split_crit_edge: ; preds = %L26.outer
br i1 %21, label %L26.outer.split.split.us, label %L26.outer.split.L26.outer.split.split_crit_edge
L26.outer.split.us: ; preds = %L26.outer
%24 = icmp eq i64 %"#temp#.0.ph", 1
br i1 %24, label %L203, label %if13
L26.outer.split.L26.outer.split.split_crit_edge: ; preds = %L26.outer.L26.outer.split_crit_edge
br i1 %22, label %L203.loopexit, label %L26.outer.split.split.L26.outer.split.split.split_crit_edge
L26.outer.split.split.us: ; preds = %L26.outer.L26.outer.split_crit_edge
%25 = icmp eq i64 %"#temp#.0.ph", 1
br i1 %25, label %L203, label %if14
L26.outer.split.split.L26.outer.split.split.split_crit_edge: ; preds = %L26.outer.split.L26.outer.split.split_crit_edge
%26 = icmp eq i64 %"#temp#.0.ph", 1
br i1 %26, label %L203.loopexit, label %if18.lr.ph
L203.loopexit: ; preds = %L26.outer.split.L26.outer.split.split_crit_edge, %L26.outer.split.split.L26.outer.split.split.split_crit_edge
br label %L203
L203: ; preds = %L203.loopexit, %L26.outer.split.us, %L26.outer.split.split.us
%y.sroa.0.0..sroa_cast31 = bitcast %Dual.66* %0 to i64*
store i64 %y.sroa.0.0.ph, i64* %y.sroa.0.0..sroa_cast31, align 8
%y.sroa.4.sroa.0.0.y.sroa.4.0..sroa_cast37.sroa_idx = getelementptr inbounds %Dual.66, %Dual.66* %0, i64 0, i32 1, i32 0, i64 0
%27 = bitcast double* %y.sroa.4.sroa.0.0.y.sroa.4.0..sroa_cast37.sroa_idx to <2 x double>*
store <2 x double> %23, <2 x double>* %27, align 8
%y.sroa.4.sroa.5.0.y.sroa.4.0..sroa_cast37.sroa_idx62 = getelementptr inbounds %Dual.66, %Dual.66* %0, i64 0, i32 1, i32 0, i64 2
store double %y.sroa.4.sroa.5.0.ph, double* %y.sroa.4.sroa.5.0.y.sroa.4.0..sroa_cast37.sroa_idx62, align 8
ret void
if13: ; preds = %L26.outer.split.us
call void @jl_throw(i8** inttoptr (i64 139708457471536 to i8**))
unreachable
if14: ; preds = %L26.outer.split.split.us
call void @jl_throw(i8** inttoptr (i64 139708457471536 to i8**))
unreachable
if18.lr.ph: ; preds = %L26.outer.split.split.L26.outer.split.split.split_crit_edge
%28 = add i64 %"#temp#.0.ph", 1
br label %if18
if18: ; preds = %if18.lr.ph, %L116
%"i#665.0144" = phi i64 [ 0, %if18.lr.ph ], [ %35, %L116 ]
%y.sroa.4.sroa.5.1143 = phi double [ %y.sroa.4.sroa.5.0.ph, %if18.lr.ph ], [ %51, %L116 ]
%y.sroa.0.1141 = phi i64 [ %y.sroa.0.0.ph, %if18.lr.ph ], [ %54, %L116 ]
%29 = phi <2 x double> [ %23, %if18.lr.ph ], [ %50, %L116 ]
%30 = add nuw nsw i64 %"i#665.0144", 2
%31 = sitofp i64 %30 to double
%32 = call double @llvm.pow.f64(double %11, double %31)
%33 = fadd double %31, %11
%notlhs = fcmp ord double %32, 0.000000e+00
%notrhs = fcmp uno double %33, 0.000000e+00
%34 = or i1 %notlhs, %notrhs
br i1 %34, label %L97, label %if19
if19: ; preds = %if18
call void @jl_throw(i8** inttoptr (i64 139708457475720 to i8**))
unreachable
L97: ; preds = %if18
%35 = add nuw nsw i64 %"i#665.0144", 1
%36 = sitofp i64 %35 to double
%37 = call double @llvm.pow.f64(double %11, double %36)
%38 = fadd double %36, %11
%notlhs26 = fcmp ord double %37, 0.000000e+00
%notrhs27 = fcmp uno double %38, 0.000000e+00
%39 = or i1 %notrhs27, %notlhs26
br i1 %39, label %L116, label %if22
if22: ; preds = %L97
call void @jl_throw(i8** inttoptr (i64 139708457475720 to i8**))
unreachable
L116: ; preds = %L97
%40 = fmul double %31, %37
%41 = insertelement <2 x double> undef, double %40, i32 0
%42 = insertelement <2 x double> %41, double %40, i32 1
%43 = fmul <2 x double> %42, %5
%44 = fmul double %40, %8
%45 = insertelement <2 x double> undef, double %31, i32 0
%46 = insertelement <2 x double> %45, double %31, i32 1
%47 = fdiv <2 x double> %43, %46
%48 = fdiv double %44, %31
%49 = fdiv double %32, %31
%50 = fsub <2 x double> %29, %47
%51 = fsub double %y.sroa.4.sroa.5.1143, %48
%52 = bitcast i64 %y.sroa.0.1141 to double
%53 = fsub double %52, %49
%54 = bitcast double %53 to i64
%55 = icmp slt i64 %35, %20
br i1 %55, label %if18, label %L26.outer.loopexit
}
using ForwardDiff: Dual
using BenchmarkTools
const SUITE = BenchmarkGroup()
function log1mx_taylor(p, x)
y = -x
@simd for n in 2:p
@inbounds y -= x^n / n
end
return y
end
for x in Dual.(-0.1:0.1:0.1, 1.0, 2.0, 3.0)
SUITE["log(1-$x)"] = @benchmarkable log1mx_taylor(1000, $x)
end
function print_code_native(io = STDOUT)
code_native(io, log1mx_taylor, (Int, Dual{:t,Float64,3}))
end
function print_code_llvm(io = STDOUT)
code_llvm(io, log1mx_taylor, (Int, Dual{:t,Float64,3}))
end

Benchmark Report for ForwardDiff

Job Properties

  • Time of benchmarks:
    • Target: 18 Jul 2018 - 00:06
    • Baseline: 18 Jul 2018 - 00:06
  • Package commits:
    • Target: 6d0eb4
    • Baseline: 3f6288
  • Julia commits:
    • Target: 9d11f6
    • Baseline: 9d11f6
  • Julia command flags:
    • Target: -O3
    • Baseline: -O3
  • Environment variables:
    • Target: None
    • Baseline: None

Results

A ratio greater than 1.0 denotes a possible regression (marked with ❌), while a ratio less than 1.0 denotes a possible improvement (marked with ✅). Only significant results - results that indicate possible regressions or improvements - are shown below (thus, an empty table means that all benchmark results remained invariant between builds).

ID time ratio memory ratio

Benchmark Group List

Here's a list of all the benchmark groups executed by this job:

  • []

Julia versioninfo

Target

Julia Version 0.6.4
Commit 9d11f62bcb (2018-07-09 19:09 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz
  WORD_SIZE: 64
           Ubuntu 16.04.4 LTS
  uname: Linux 4.4.0-119-generic #143-Ubuntu SMP Mon Apr 2 16:08:24 UTC 2018 x86_64 x86_64
Memory: 125.78757095336914 GB (58058.859375 MB free)
Uptime: 3.492646e6 sec
Load Avg:  1.06591796875  0.5380859375  0.34033203125
Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz: 
          speed         user         nice          sys         idle          irq
#1-48  1281 MHz  211054235 s      11102 s  183761839 s  16362297681 s          0 s

  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, broadwell)

Baseline

Julia Version 0.6.4
Commit 9d11f62bcb (2018-07-09 19:09 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz
  WORD_SIZE: 64
           Ubuntu 16.04.4 LTS
  uname: Linux 4.4.0-119-generic #143-Ubuntu SMP Mon Apr 2 16:08:24 UTC 2018 x86_64 x86_64
Memory: 125.78757095336914 GB (58055.34375 MB free)
Uptime: 3.492677e6 sec
Load Avg:  1.103515625  0.6064453125  0.3720703125
Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz: 
          speed         user         nice          sys         idle          irq
#1-48  1645 MHz  211057373 s      11102 s  183762043 s  16362445204 s          0 s

  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, broadwell)
--- bench_log1mx_taylor-HEAD~0.ll 2018-07-18 00:09:10.691657082 -0700
+++ bench_log1mx_taylor-HEAD~1.ll 2018-07-18 00:09:28.875961410 -0700
@@ -1,16 +1,16 @@
-Revision: 6d0eb49e3ef74d2e97c7075af088f3ef1590a43e
+Revision: 3f62885ecf2523c7e0707863a2c11ccf93fccc5f
-define void @julia_log1mx_taylor_62988(%Dual.67* noalias nocapture sret, i64, %Dual.67* nocapture readonly dereferenceable(32)) #0 !dbg !5 {
+define void @julia_log1mx_taylor_62984(%Dual.66* noalias nocapture sret, i64, %Dual.66* nocapture readonly dereferenceable(32)) #0 !dbg !5 {
top:
- %3 = getelementptr inbounds %Dual.67, %Dual.67* %2, i64 0, i32 1, i32 0, i64 0
+ %3 = getelementptr inbounds %Dual.66, %Dual.66* %2, i64 0, i32 1, i32 0, i64 0
%4 = bitcast double* %3 to <2 x double>*
%5 = load <2 x double>, <2 x double>* %4, align 8
%6 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %5
- %7 = getelementptr inbounds %Dual.67, %Dual.67* %2, i64 0, i32 1, i32 0, i64 2
+ %7 = getelementptr inbounds %Dual.66, %Dual.66* %2, i64 0, i32 1, i32 0, i64 2
%8 = load double, double* %7, align 8
%9 = fsub double -0.000000e+00, %8
- %10 = getelementptr inbounds %Dual.67, %Dual.67* %2, i64 0, i32 0
+ %10 = getelementptr inbounds %Dual.66, %Dual.66* %2, i64 0, i32 0
%11 = load double, double* %10, align 8
%12 = fsub double -0.000000e+00, %11
%13 = bitcast double %12 to i64
@@ -25,14 +25,14 @@
%22 = icmp slt i64 %20, 1
br label %L26.outer
-L26.outer.loopexit: ; preds = %L181
+L26.outer.loopexit: ; preds = %L116
br label %L26.outer
L26.outer: ; preds = %L26.outer.loopexit, %top
%"#temp#.0.ph" = phi i64 [ 0, %top ], [ %28, %L26.outer.loopexit ]
- %y.sroa.0.0.ph = phi i64 [ %13, %top ], [ %55, %L26.outer.loopexit ]
- %y.sroa.4.sroa.5.0.ph = phi double [ %9, %top ], [ %52, %L26.outer.loopexit ]
- %23 = phi <2 x double> [ %6, %top ], [ %51, %L26.outer.loopexit ]
+ %y.sroa.0.0.ph = phi i64 [ %13, %top ], [ %54, %L26.outer.loopexit ]
+ %y.sroa.4.sroa.5.0.ph = phi double [ %9, %top ], [ %51, %L26.outer.loopexit ]
+ %23 = phi <2 x double> [ %6, %top ], [ %50, %L26.outer.loopexit ]
br i1 %17, label %L26.outer.split.us, label %L26.outer.L26.outer.split_crit_edge
L26.outer.L26.outer.split_crit_edge: ; preds = %L26.outer
@@ -40,93 +40,92 @@
L26.outer.split.us: ; preds = %L26.outer
%24 = icmp eq i64 %"#temp#.0.ph", 1
- br i1 %24, label %L243, label %if16
+ br i1 %24, label %L203, label %if13
L26.outer.split.L26.outer.split.split_crit_edge: ; preds = %L26.outer.L26.outer.split_crit_edge
- br i1 %22, label %L243.loopexit, label %L26.outer.split.split.L26.outer.split.split.split_crit_edge
+ br i1 %22, label %L203.loopexit, label %L26.outer.split.split.L26.outer.split.split.split_crit_edge
L26.outer.split.split.us: ; preds = %L26.outer.L26.outer.split_crit_edge
%25 = icmp eq i64 %"#temp#.0.ph", 1
- br i1 %25, label %L243, label %if17
+ br i1 %25, label %L203, label %if14
L26.outer.split.split.L26.outer.split.split.split_crit_edge: ; preds = %L26.outer.split.L26.outer.split.split_crit_edge
%26 = icmp eq i64 %"#temp#.0.ph", 1
- br i1 %26, label %L243.loopexit, label %if21.lr.ph
+ br i1 %26, label %L203.loopexit, label %if18.lr.ph
-L243.loopexit: ; preds = %L26.outer.split.L26.outer.split.split_crit_edge, %L26.outer.split.split.L26.outer.split.split.split_crit_edge
- br label %L243
+L203.loopexit: ; preds = %L26.outer.split.L26.outer.split.split_crit_edge, %L26.outer.split.split.L26.outer.split.split.split_crit_edge
+ br label %L203
-L243: ; preds = %L243.loopexit, %L26.outer.split.us, %L26.outer.split.split.us
- %y.sroa.0.0..sroa_cast39 = bitcast %Dual.67* %0 to i64*
- store i64 %y.sroa.0.0.ph, i64* %y.sroa.0.0..sroa_cast39, align 8
- %y.sroa.4.sroa.0.0.y.sroa.4.0..sroa_cast45.sroa_idx = getelementptr inbounds %Dual.67, %Dual.67* %0, i64 0, i32 1, i32 0, i64 0
- %27 = bitcast double* %y.sroa.4.sroa.0.0.y.sroa.4.0..sroa_cast45.sroa_idx to <2 x double>*
+L203: ; preds = %L203.loopexit, %L26.outer.split.us, %L26.outer.split.split.us
+ %y.sroa.0.0..sroa_cast31 = bitcast %Dual.66* %0 to i64*
+ store i64 %y.sroa.0.0.ph, i64* %y.sroa.0.0..sroa_cast31, align 8
+ %y.sroa.4.sroa.0.0.y.sroa.4.0..sroa_cast37.sroa_idx = getelementptr inbounds %Dual.66, %Dual.66* %0, i64 0, i32 1, i32 0, i64 0
+ %27 = bitcast double* %y.sroa.4.sroa.0.0.y.sroa.4.0..sroa_cast37.sroa_idx to <2 x double>*
store <2 x double> %23, <2 x double>* %27, align 8
- %y.sroa.4.sroa.5.0.y.sroa.4.0..sroa_cast45.sroa_idx70 = getelementptr inbounds %Dual.67, %Dual.67* %0, i64 0, i32 1, i32 0, i64 2
- store double %y.sroa.4.sroa.5.0.ph, double* %y.sroa.4.sroa.5.0.y.sroa.4.0..sroa_cast45.sroa_idx70, align 8
+ %y.sroa.4.sroa.5.0.y.sroa.4.0..sroa_cast37.sroa_idx62 = getelementptr inbounds %Dual.66, %Dual.66* %0, i64 0, i32 1, i32 0, i64 2
+ store double %y.sroa.4.sroa.5.0.ph, double* %y.sroa.4.sroa.5.0.y.sroa.4.0..sroa_cast37.sroa_idx62, align 8
ret void
-if16: ; preds = %L26.outer.split.us
- call void @jl_throw(i8** inttoptr (i64 139920386919984 to i8**))
+if13: ; preds = %L26.outer.split.us
+ call void @jl_throw(i8** inttoptr (i64 139708457471536 to i8**))
unreachable
-if17: ; preds = %L26.outer.split.split.us
- call void @jl_throw(i8** inttoptr (i64 139920386919984 to i8**))
+if14: ; preds = %L26.outer.split.split.us
+ call void @jl_throw(i8** inttoptr (i64 139708457471536 to i8**))
unreachable
-if21.lr.ph: ; preds = %L26.outer.split.split.L26.outer.split.split.split_crit_edge
+if18.lr.ph: ; preds = %L26.outer.split.split.L26.outer.split.split.split_crit_edge
%28 = add i64 %"#temp#.0.ph", 1
- br label %if21
+ br label %if18
-if21: ; preds = %if21.lr.ph, %L181
- %"i#665.0175" = phi i64 [ 0, %if21.lr.ph ], [ %35, %L181 ]
- %y.sroa.4.sroa.5.1174 = phi double [ %y.sroa.4.sroa.5.0.ph, %if21.lr.ph ], [ %52, %L181 ]
- %y.sroa.0.1173 = phi i64 [ %y.sroa.0.0.ph, %if21.lr.ph ], [ %55, %L181 ]
- %29 = phi <2 x double> [ %23, %if21.lr.ph ], [ %51, %L181 ]
- %30 = add nuw nsw i64 %"i#665.0175", 2
+if18: ; preds = %if18.lr.ph, %L116
+ %"i#665.0144" = phi i64 [ 0, %if18.lr.ph ], [ %35, %L116 ]
+ %y.sroa.4.sroa.5.1143 = phi double [ %y.sroa.4.sroa.5.0.ph, %if18.lr.ph ], [ %51, %L116 ]
+ %y.sroa.0.1141 = phi i64 [ %y.sroa.0.0.ph, %if18.lr.ph ], [ %54, %L116 ]
+ %29 = phi <2 x double> [ %23, %if18.lr.ph ], [ %50, %L116 ]
+ %30 = add nuw nsw i64 %"i#665.0144", 2
%31 = sitofp i64 %30 to double
%32 = call double @llvm.pow.f64(double %11, double %31)
%33 = fadd double %31, %11
%notlhs = fcmp ord double %32, 0.000000e+00
%notrhs = fcmp uno double %33, 0.000000e+00
%34 = or i1 %notlhs, %notrhs
- br i1 %34, label %L130, label %if22
+ br i1 %34, label %L97, label %if19
-if22: ; preds = %if21
- call void @jl_throw(i8** inttoptr (i64 139920386924168 to i8**))
+if19: ; preds = %if18
+ call void @jl_throw(i8** inttoptr (i64 139708457475720 to i8**))
unreachable
-L130: ; preds = %if21
- %35 = add nuw nsw i64 %"i#665.0175", 1
+L97: ; preds = %if18
+ %35 = add nuw nsw i64 %"i#665.0144", 1
%36 = sitofp i64 %35 to double
%37 = call double @llvm.pow.f64(double %11, double %36)
%38 = fadd double %36, %11
- %notlhs30 = fcmp ord double %37, 0.000000e+00
- %notrhs31 = fcmp uno double %38, 0.000000e+00
- %39 = or i1 %notrhs31, %notlhs30
- br i1 %39, label %L181, label %if26
-
-L181: ; preds = %L130
- %40 = insertelement <2 x double> undef, double %31, i32 0
- %41 = insertelement <2 x double> %40, double %31, i32 1
- %42 = fmul <2 x double> %41, %5
- %43 = fmul double %31, %8
- %44 = insertelement <2 x double> undef, double %37, i32 0
- %45 = insertelement <2 x double> %44, double %37, i32 1
- %46 = fmul <2 x double> %45, %42
- %47 = fmul double %37, %43
- %48 = fdiv <2 x double> %46, %41
- %49 = fdiv double %47, %31
- %50 = fdiv double %32, %31
- %51 = fsub <2 x double> %29, %48
- %52 = fsub double %y.sroa.4.sroa.5.1174, %49
- %53 = bitcast i64 %y.sroa.0.1173 to double
- %54 = fsub double %53, %50
- %55 = bitcast double %54 to i64
- %56 = icmp slt i64 %35, %20
- br i1 %56, label %if21, label %L26.outer.loopexit
+ %notlhs26 = fcmp ord double %37, 0.000000e+00
+ %notrhs27 = fcmp uno double %38, 0.000000e+00
+ %39 = or i1 %notrhs27, %notlhs26
+ br i1 %39, label %L116, label %if22
-if26: ; preds = %L130
- call void @jl_throw(i8** inttoptr (i64 139920386924168 to i8**))
+if22: ; preds = %L97
+ call void @jl_throw(i8** inttoptr (i64 139708457475720 to i8**))
unreachable
+
+L116: ; preds = %L97
+ %40 = fmul double %31, %37
+ %41 = insertelement <2 x double> undef, double %40, i32 0
+ %42 = insertelement <2 x double> %41, double %40, i32 1
+ %43 = fmul <2 x double> %42, %5
+ %44 = fmul double %40, %8
+ %45 = insertelement <2 x double> undef, double %31, i32 0
+ %46 = insertelement <2 x double> %45, double %31, i32 1
+ %47 = fdiv <2 x double> %43, %46
+ %48 = fdiv double %44, %31
+ %49 = fdiv double %32, %31
+ %50 = fsub <2 x double> %29, %47
+ %51 = fsub double %y.sroa.4.sroa.5.1143, %48
+ %52 = bitcast i64 %y.sroa.0.1141 to double
+ %53 = fsub double %52, %49
+ %54 = bitcast double %53 to i64
+ %55 = icmp slt i64 %35, %20
+ br i1 %55, label %if18, label %L26.outer.loopexit
}
--- bench_log1mx_taylor-HEAD~0.code_native 2018-07-18 00:07:56.190410301 -0700
+++ bench_log1mx_taylor-HEAD~1.code_native 2018-07-18 00:08:14.258712667 -0700
@@ -1,4 +1,4 @@
-Revision: 6d0eb49e3ef74d2e97c7075af088f3ef1590a43e
+Revision: 3f62885ecf2523c7e0707863a2c11ccf93fccc5f
.text
Filename: bench_log1mx_taylor.jl
@@ -13,7 +13,7 @@
movq %rdi, -64(%rbp)
Source line: 155
vmovupd 8(%rdx), %xmm1
- movabsq $140352823859392, %rax # imm = 0x7FA6703598C0
+ movabsq $140209366140704, %rax # imm = 0x7F8509768720
vmovapd (%rax), %xmm0
vmovapd %xmm1, -144(%rbp)
vxorpd %xmm0, %xmm1, %xmm1
@@ -21,8 +21,8 @@
vmovapd %xmm2, -128(%rbp)
vxorpd %xmm0, %xmm2, %xmm3
Source line: 7
- vmovsd (%rdx), %xmm5 # xmm5 = mem[0],zero
- vmovq %xmm5, %rax
+ vmovsd (%rdx), %xmm4 # xmm4 = mem[0],zero
+ vmovq %xmm4, %rax
movabsq $-9223372036854775808, %rbx # imm = 0x8000000000000000
xorq %rax, %rbx
Source line: 65
@@ -37,7 +37,7 @@
seto -41(%rbp)
xorl %eax, %eax
movabsq $__pow, %r15
- vmovsd %xmm5, -56(%rbp)
+ vmovsd %xmm4, -56(%rbp)
jmp L148
nopw (%rax,%rax)
L144:
@@ -45,16 +45,16 @@
Source line: 222
L148:
cmpb $0, -42(%rbp)
- jne L442
+ jne L438
Source line: 165
cmpb $0, -41(%rbp)
- jne L470
+ jne L466
testq %r12, %r12
Source line: 68
- jle L476
+ jle L472
Source line: 66
cmpq $1, %rax
- je L476
+ je L472
addq $1, %rax
movq %rax, -72(%rbp)
movl $2, %r13d
@@ -67,7 +67,7 @@
vcvtsi2sdq %r13, %xmm0, %xmm1
Source line: 714
vmovaps %xmm1, -112(%rbp)
- vmovapd %xmm5, %xmm0
+ vmovapd %xmm4, %xmm0
callq *%r15
vmovapd -112(%rbp), %xmm1
vmovsd -56(%rbp), %xmm4 # xmm4 = mem[0],zero
@@ -77,7 +77,7 @@
Source line: 714
vaddsd %xmm4, %xmm1, %xmm2
vucomisd %xmm2, %xmm2
- jnp L511
+ jnp L507
L275:
vmovsd %xmm0, -80(%rbp)
Source line: 716
@@ -88,29 +88,28 @@
vmovsd %xmm1, -88(%rbp)
vmovapd %xmm4, %xmm0
callq *%r15
- vmovsd -56(%rbp), %xmm5 # xmm5 = mem[0],zero
- vaddsd -88(%rbp), %xmm5, %xmm1
+ vmovsd -56(%rbp), %xmm4 # xmm4 = mem[0],zero
+ vaddsd -88(%rbp), %xmm4, %xmm1
Source line: 315
vucomisd %xmm1, %xmm1
jp L331
vucomisd %xmm0, %xmm0
- jp L540
+ jp L536
L331:
- vmovapd -112(%rbp), %xmm6
-Source line: 155
- vmovddup %xmm6, %xmm1 # xmm1 = xmm6[0,0]
- vmulpd -144(%rbp), %xmm1, %xmm2
- vmulsd -128(%rbp), %xmm6, %xmm3
+ vmovapd -112(%rbp), %xmm3
+Source line: 411
+ vmulsd %xmm0, %xmm3, %xmm0
Source line: 155
- vmovddup %xmm0, %xmm4 # xmm4 = xmm0[0,0]
- vmulpd %xmm2, %xmm4, %xmm2
- vmulsd %xmm3, %xmm0, %xmm0
+ vmovddup %xmm0, %xmm1 # xmm1 = xmm0[0,0]
+ vmulpd -144(%rbp), %xmm1, %xmm1
+ vmulsd -128(%rbp), %xmm0, %xmm0
Source line: 155
- vdivpd %xmm1, %xmm2, %xmm1
- vdivsd %xmm6, %xmm0, %xmm0
+ vmovddup %xmm3, %xmm2 # xmm2 = xmm3[0,0]
+ vdivpd %xmm2, %xmm1, %xmm1
+ vdivsd %xmm3, %xmm0, %xmm0
Source line: 9
vmovsd -80(%rbp), %xmm2 # xmm2 = mem[0],zero
- vdivsd %xmm6, %xmm2, %xmm2
+ vdivsd %xmm3, %xmm2, %xmm2
vmovapd -176(%rbp), %xmm3
Source line: 155
vsubpd %xmm1, %xmm3, %xmm3
@@ -127,18 +126,18 @@
jl L208
jmp L144
Source line: 66
-L442:
+L438:
cmpq $1, %rax
- je L476
+ je L472
Source line: 222
movabsq $jl_throw, %rax
- movabsq $140353566625328, %rdi # imm = 0x7FA69C7B4E30
+ movabsq $140210108911152, %rdi # imm = 0x7F8535BC4E30
callq *%rax
Source line: 66
-L470:
+L466:
cmpq $1, %rax
- jne L569
-L476:
+ jne L565
+L472:
movq -64(%rbp), %rax
Source line: 11
movq %rbx, (%rax)
@@ -152,21 +151,21 @@
popq %r15
popq %rbp
retq
-L511:
- movabsq $140353566625328, %rdi # imm = 0x7FA69C7B4E30
+L507:
+ movabsq $140210108911152, %rdi # imm = 0x7F8535BC4E30
Source line: 315
addq $4184, %rdi # imm = 0x1058
movabsq $jl_throw, %rax
callq *%rax
-L540:
- movabsq $140353566625328, %rdi # imm = 0x7FA69C7B4E30
+L536:
+ movabsq $140210108911152, %rdi # imm = 0x7F8535BC4E30
Source line: 315
addq $4184, %rdi # imm = 0x1058
movabsq $jl_throw, %rax
callq *%rax
Source line: 165
-L569:
+L565:
movabsq $jl_throw, %rax
- movabsq $140353566625328, %rdi # imm = 0x7FA69C7B4E30
+ movabsq $140210108911152, %rdi # imm = 0x7F8535BC4E30
callq *%rax
- nop
+ nopl (%rax,%rax)
using PkgBenchmark: judge, target_result, baseline_result,
writeresults, export_markdown, BenchmarkConfig
script, report_name, = ARGS
mkpath(dirname(report_name))
juliacmd = `$(joinpath(JULIA_HOME, Base.julia_exename())) -O3`
results = judge(
"ForwardDiff",
BenchmarkConfig(id = "HEAD", juliacmd = juliacmd),
BenchmarkConfig(id = "HEAD^", juliacmd = juliacmd);
script = script)
writeresults("$(report_name)_target.json", target_result(results))
writeresults("$(report_name)_baseline.json", baseline_result(results))
export_markdown("$(report_name)_judge.md", results)
showall(results)
println()
script, report_name, rev, = ARGS
mkpath(dirname(report_name))
cd(Pkg.dir("ForwardDiff")) do
run(`git checkout $rev`)
@eval include(script)
run(`git checkout -`) # rollback
end
real_rev = cd(Pkg.dir("ForwardDiff")) do
strip(readstring(`git rev-parse $rev`))
end
open("$(report_name)-$rev.ll", "w") do io
println(io, "Revision: ", real_rev)
println(io)
print_code_llvm(io)
end
REPORTS = \
bench_exp_taylor_judge.md \
bench_log1mx_taylor_judge.md
DIFF_CODE_NATIVE = \
bench_exp_taylor_native.diff \
bench_log1mx_taylor_native.diff
DIFF_CODE_LLVM = \
bench_exp_taylor_llvm.diff \
bench_log1mx_taylor_llvm.diff
JULIA = julia --color=yes -O3
all: $(REPORTS) $(DIFF_CODE_NATIVE) $(DIFF_CODE_LLVM)
$(REPORTS): %_judge.md: judge.jl %.jl
$(JULIA) $^ $*
$(DIFF_CODE_NATIVE): %_native.diff: native_dump.jl %.jl
$(JULIA) $^ $* 'HEAD~0'
$(JULIA) $^ $* 'HEAD~1'
diff -u $*-'HEAD~0'.code_native $*-'HEAD~1'.code_native > $@ || true
$(DIFF_CODE_LLVM): %_llvm.diff: llvm_dump.jl %.jl
$(JULIA) $^ $* 'HEAD~0'
$(JULIA) $^ $* 'HEAD~1'
diff -u $*-'HEAD~0'.ll $*-'HEAD~1'.ll > $@ || true
script, report_name, rev, = ARGS
mkpath(dirname(report_name))
cd(Pkg.dir("ForwardDiff")) do
run(`git checkout $rev`)
@eval include(script)
run(`git checkout -`) # rollback
end
real_rev = cd(Pkg.dir("ForwardDiff")) do
strip(readstring(`git rev-parse $rev`))
end
open("$(report_name)-$rev.code_native", "w") do io
println(io, "Revision: ", real_rev)
println(io)
print_code_native(io)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment