Skip to content

Instantly share code, notes, and snippets.

@tkf
Created July 18, 2018 21:04
Show Gist options
  • Save tkf/8e743a7e5e4fe4e49a4e996fd7b577a8 to your computer and use it in GitHub Desktop.
Save tkf/8e743a7e5e4fe4e49a4e996fd7b577a8 to your computer and use it in GitHub Desktop.
define void @julia_f_62604(%Dual* noalias nocapture sret, %Dual* nocapture readonly dereferenceable(32), %Dual* nocapture readonly dereferenceable(32), i64, i64) #0 !dbg !5 {
top:
%5 = getelementptr inbounds %Dual, %Dual* %1, i64 0, i32 0
%6 = load double, double* %5, align 8
%7 = sitofp i64 %3 to double
%8 = call double @llvm.pow.f64(double %6, double %7)
%9 = fadd double %7, %6
%notlhs = fcmp ord double %8, 0.000000e+00
%notrhs = fcmp uno double %9, 0.000000e+00
%10 = or i1 %notlhs, %notrhs
br i1 %10, label %L14, label %if
if: ; preds = %top
call void @jl_throw(i8** inttoptr (i64 139801037461808 to i8**))
unreachable
L14: ; preds = %top
%11 = add i64 %3, -1
%12 = sitofp i64 %11 to double
%13 = call double @llvm.pow.f64(double %6, double %12)
%14 = fadd double %12, %6
%notlhs20 = fcmp ord double %13, 0.000000e+00
%notrhs21 = fcmp uno double %14, 0.000000e+00
%15 = or i1 %notrhs21, %notlhs20
br i1 %15, label %L31, label %if10
if10: ; preds = %L14
call void @jl_throw(i8** inttoptr (i64 139801037461808 to i8**))
unreachable
L31: ; preds = %L14
%16 = getelementptr inbounds %Dual, %Dual* %2, i64 0, i32 0
%17 = load double, double* %16, align 8
%18 = sitofp i64 %4 to double
%19 = call double @llvm.pow.f64(double %17, double %18)
%20 = fadd double %18, %17
%notlhs25 = fcmp ord double %19, 0.000000e+00
%notrhs26 = fcmp uno double %20, 0.000000e+00
%21 = or i1 %notlhs25, %notrhs26
br i1 %21, label %L69, label %if13
if13: ; preds = %L31
call void @jl_throw(i8** inttoptr (i64 139801037461808 to i8**))
unreachable
L69: ; preds = %L31
%22 = add i64 %4, -1
%23 = sitofp i64 %22 to double
%24 = call double @llvm.pow.f64(double %17, double %23)
%25 = fadd double %23, %17
%notlhs27 = fcmp ord double %24, 0.000000e+00
%notrhs28 = fcmp uno double %25, 0.000000e+00
%26 = or i1 %notrhs28, %notlhs27
br i1 %26, label %L86, label %if16
if16: ; preds = %L69
call void @jl_throw(i8** inttoptr (i64 139801037461808 to i8**))
unreachable
L86: ; preds = %L69
%27 = fmul double %7, %13
%28 = getelementptr inbounds %Dual, %Dual* %1, i64 0, i32 1, i32 0, i64 0
%29 = load double, double* %28, align 8
%30 = fmul double %27, %29
%31 = getelementptr inbounds %Dual, %Dual* %1, i64 0, i32 1, i32 0, i64 1
%32 = load double, double* %31, align 8
%33 = fmul double %27, %32
%34 = getelementptr inbounds %Dual, %Dual* %1, i64 0, i32 1, i32 0, i64 2
%35 = load double, double* %34, align 8
%36 = fmul double %27, %35
%37 = fmul double %18, %24
%38 = getelementptr inbounds %Dual, %Dual* %2, i64 0, i32 1, i32 0, i64 0
%39 = load double, double* %38, align 8
%40 = fmul double %37, %39
%41 = getelementptr inbounds %Dual, %Dual* %2, i64 0, i32 1, i32 0, i64 1
%42 = load double, double* %41, align 8
%43 = fmul double %37, %42
%44 = getelementptr inbounds %Dual, %Dual* %2, i64 0, i32 1, i32 0, i64 2
%45 = load double, double* %44, align 8
%46 = fmul double %37, %45
%47 = insertelement <4 x double> undef, double %8, i32 0
%48 = insertelement <4 x double> %47, double %30, i32 1
%49 = insertelement <4 x double> %48, double %33, i32 2
%50 = insertelement <4 x double> %49, double %36, i32 3
%51 = insertelement <4 x double> undef, double %19, i32 0
%52 = insertelement <4 x double> %51, double %40, i32 1
%53 = insertelement <4 x double> %52, double %43, i32 2
%54 = insertelement <4 x double> %53, double %46, i32 3
%55 = fadd <4 x double> %50, %54
%56 = bitcast %Dual* %0 to <4 x double>*
store <4 x double> %55, <4 x double>* %56, align 8
ret void
}
.text
Filename: script.jl
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $88, %rsp
movq %r8, %rbx
movq %rcx, %r15
movq %rdx, %r12
movq %rsi, %r13
movq %rdi, -88(%rbp)
Source line: 409
vmovsd (%r13), %xmm0 # xmm0 = mem[0],zero
Source line: 716
vxorps %xmm1, %xmm1, %xmm1
vcvtsi2sdq %r15, %xmm1, %xmm1
movabsq $pow, %r14
vmovsd %xmm0, -48(%rbp)
vmovsd %xmm1, -64(%rbp)
Source line: 714
callq *%r14
vmovapd %xmm0, -128(%rbp)
Source line: 315
vucomisd %xmm0, %xmm0
jnp L102
Source line: 714
vmovsd -48(%rbp), %xmm0 # xmm0 = mem[0],zero
vaddsd -64(%rbp), %xmm0, %xmm0
vucomisd %xmm0, %xmm0
jnp L392
Source line: 716
L102:
addq $-1, %r15
vxorps %xmm0, %xmm0, %xmm0
vcvtsi2sdq %r15, %xmm0, %xmm1
Source line: 714
vmovsd %xmm1, -56(%rbp)
vmovsd -48(%rbp), %xmm0 # xmm0 = mem[0],zero
callq *%r14
vmovsd -56(%rbp), %xmm1 # xmm1 = mem[0],zero
vaddsd -48(%rbp), %xmm1, %xmm1
Source line: 315
vucomisd %xmm1, %xmm1
jp L154
vucomisd %xmm0, %xmm0
jp L414
L154:
vmovsd %xmm0, -80(%rbp)
Source line: 409
vmovsd (%r12), %xmm0 # xmm0 = mem[0],zero
Source line: 716
vxorps %xmm1, %xmm1, %xmm1
vcvtsi2sdq %rbx, %xmm1, %xmm1
vmovsd %xmm0, -48(%rbp)
vmovsd %xmm1, -56(%rbp)
Source line: 714
callq *%r14
vmovapd %xmm0, -112(%rbp)
Source line: 315
vucomisd %xmm0, %xmm0
jnp L218
Source line: 714
vmovsd -48(%rbp), %xmm0 # xmm0 = mem[0],zero
vaddsd -56(%rbp), %xmm0, %xmm0
vucomisd %xmm0, %xmm0
jnp L436
Source line: 716
L218:
addq $-1, %rbx
vxorps %xmm0, %xmm0, %xmm0
vcvtsi2sdq %rbx, %xmm0, %xmm1
Source line: 714
vmovsd %xmm1, -72(%rbp)
vmovsd -48(%rbp), %xmm0 # xmm0 = mem[0],zero
callq *%r14
vmovsd -72(%rbp), %xmm1 # xmm1 = mem[0],zero
vaddsd -48(%rbp), %xmm1, %xmm1
Source line: 315
vucomisd %xmm1, %xmm1
jp L270
vucomisd %xmm0, %xmm0
jp L458
Source line: 411
L270:
vmovsd -80(%rbp), %xmm1 # xmm1 = mem[0],zero
vmulsd -64(%rbp), %xmm1, %xmm1
Source line: 155
vmulsd 8(%r13), %xmm1, %xmm2
vmulsd 16(%r13), %xmm1, %xmm3
vmulsd 24(%r13), %xmm1, %xmm1
Source line: 411
vmulsd -56(%rbp), %xmm0, %xmm0
Source line: 155
vmulsd 8(%r12), %xmm0, %xmm4
vmulsd 16(%r12), %xmm0, %xmm5
vmulsd 24(%r12), %xmm0, %xmm0
Source line: 7
vunpcklpd %xmm1, %xmm3, %xmm1 # xmm1 = xmm3[0],xmm1[0]
vmovapd -128(%rbp), %xmm3
vunpcklpd %xmm2, %xmm3, %xmm2 # xmm2 = xmm3[0],xmm2[0]
vinsertf128 $1, %xmm1, %ymm2, %ymm1
vunpcklpd %xmm0, %xmm5, %xmm0 # xmm0 = xmm5[0],xmm0[0]
vmovapd -112(%rbp), %xmm2
vunpcklpd %xmm4, %xmm2, %xmm2 # xmm2 = xmm2[0],xmm4[0]
vinsertf128 $1, %xmm0, %ymm2, %ymm0
vaddpd %ymm0, %ymm1, %ymm0
movq -88(%rbp), %rax
vmovupd %ymm0, (%rax)
addq $88, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
vzeroupper
retq
Source line: 315
L392:
movabsq $jl_throw, %rax
movabsq $139801037461808, %rdi # imm = 0x7F25F72D5930
callq *%rax
Source line: 315
L414:
movabsq $jl_throw, %rax
movabsq $139801037461808, %rdi # imm = 0x7F25F72D5930
callq *%rax
Source line: 315
L436:
movabsq $jl_throw, %rax
movabsq $139801037461808, %rdi # imm = 0x7F25F72D5930
callq *%rax
Source line: 315
L458:
movabsq $jl_throw, %rax
movabsq $139801037461808, %rdi # imm = 0x7F25F72D5930
callq *%rax
define void @julia_f_62896(%Dual* noalias nocapture sret, %Dual* nocapture readonly dereferenceable(32), %Dual* nocapture readonly dereferenceable(32), i64, i64) #0 !dbg !5 {
top:
%5 = getelementptr inbounds %Dual, %Dual* %1, i64 0, i32 0
%6 = load double, double* %5, align 8
%7 = sitofp i64 %3 to double
%8 = call double @llvm.pow.f64(double %6, double %7)
%9 = fadd double %7, %6
%notlhs = fcmp ord double %8, 0.000000e+00
%notrhs = fcmp uno double %9, 0.000000e+00
%10 = or i1 %notlhs, %notrhs
br i1 %10, label %L16, label %if
if: ; preds = %top
call void @jl_throw(i8** inttoptr (i64 140683894675760 to i8**))
unreachable
L16: ; preds = %top
%11 = icmp eq i64 %3, 0
br i1 %11, label %L91, label %L44
L44: ; preds = %L16
%12 = add i64 %3, -1
%13 = sitofp i64 %12 to double
%14 = call double @llvm.pow.f64(double %6, double %13)
%15 = fadd double %13, %6
%notlhs33 = fcmp ord double %14, 0.000000e+00
%notrhs34 = fcmp uno double %15, 0.000000e+00
%16 = or i1 %notrhs34, %notlhs33
br i1 %16, label %L55, label %if23
L91: ; preds = %L16, %L55
%new_partials7.sroa.0.sroa.4.0 = phi double [ %70, %L55 ], [ 0.000000e+00, %L16 ]
%17 = phi <2 x double> [ %69, %L55 ], [ zeroinitializer, %L16 ]
%18 = getelementptr inbounds %Dual, %Dual* %2, i64 0, i32 0
%19 = load double, double* %18, align 8
%20 = sitofp i64 %4 to double
%21 = call double @llvm.pow.f64(double %19, double %20)
%22 = fadd double %20, %19
%notlhs29 = fcmp ord double %21, 0.000000e+00
%notrhs30 = fcmp uno double %22, 0.000000e+00
%23 = or i1 %notlhs29, %notrhs30
br i1 %23, label %L109, label %if16
if16: ; preds = %L91
call void @jl_throw(i8** inttoptr (i64 140683894675760 to i8**))
unreachable
L109: ; preds = %L91
%24 = icmp eq i64 %4, 0
br i1 %24, label %L184, label %L137
L137: ; preds = %L109
%25 = add i64 %4, -1
%26 = sitofp i64 %25 to double
%27 = call double @llvm.pow.f64(double %19, double %26)
%28 = fadd double %26, %19
%notlhs31 = fcmp ord double %27, 0.000000e+00
%notrhs32 = fcmp uno double %28, 0.000000e+00
%29 = or i1 %notrhs32, %notlhs31
br i1 %29, label %L148, label %if20
L184: ; preds = %L109, %L148
%new_partials.sroa.0.sroa.4.0 = phi double [ %57, %L148 ], [ 0.000000e+00, %L109 ]
%30 = phi <2 x double> [ %56, %L148 ], [ zeroinitializer, %L109 ]
%31 = insertelement <4 x double> undef, double %8, i32 0
%32 = extractelement <2 x double> %17, i32 0
%33 = insertelement <4 x double> %31, double %32, i32 1
%34 = extractelement <2 x double> %17, i32 1
%35 = insertelement <4 x double> %33, double %34, i32 2
%36 = insertelement <4 x double> %35, double %new_partials7.sroa.0.sroa.4.0, i32 3
%37 = insertelement <4 x double> undef, double %21, i32 0
%38 = extractelement <2 x double> %30, i32 0
%39 = insertelement <4 x double> %37, double %38, i32 1
%40 = extractelement <2 x double> %30, i32 1
%41 = insertelement <4 x double> %39, double %40, i32 2
%42 = insertelement <4 x double> %41, double %new_partials.sroa.0.sroa.4.0, i32 3
%43 = fadd <4 x double> %36, %42
%44 = bitcast %Dual* %0 to <4 x double>*
store <4 x double> %43, <4 x double>* %44, align 8
ret void
if20: ; preds = %L137
call void @jl_throw(i8** inttoptr (i64 140683894675760 to i8**))
unreachable
L148: ; preds = %L137
%45 = getelementptr inbounds %Dual, %Dual* %2, i64 0, i32 1, i32 0, i64 0
%46 = bitcast double* %45 to <2 x double>*
%47 = load <2 x double>, <2 x double>* %46, align 8
%48 = insertelement <2 x double> undef, double %20, i32 0
%49 = insertelement <2 x double> %48, double %20, i32 1
%50 = fmul <2 x double> %49, %47
%51 = getelementptr inbounds %Dual, %Dual* %2, i64 0, i32 1, i32 0, i64 2
%52 = load double, double* %51, align 8
%53 = fmul double %20, %52
%54 = insertelement <2 x double> undef, double %27, i32 0
%55 = insertelement <2 x double> %54, double %27, i32 1
%56 = fmul <2 x double> %55, %50
%57 = fmul double %27, %53
br label %L184
if23: ; preds = %L44
call void @jl_throw(i8** inttoptr (i64 140683894675760 to i8**))
unreachable
L55: ; preds = %L44
%58 = getelementptr inbounds %Dual, %Dual* %1, i64 0, i32 1, i32 0, i64 0
%59 = bitcast double* %58 to <2 x double>*
%60 = load <2 x double>, <2 x double>* %59, align 8
%61 = insertelement <2 x double> undef, double %7, i32 0
%62 = insertelement <2 x double> %61, double %7, i32 1
%63 = fmul <2 x double> %62, %60
%64 = getelementptr inbounds %Dual, %Dual* %1, i64 0, i32 1, i32 0, i64 2
%65 = load double, double* %64, align 8
%66 = fmul double %7, %65
%67 = insertelement <2 x double> undef, double %14, i32 0
%68 = insertelement <2 x double> %67, double %14, i32 1
%69 = fmul <2 x double> %68, %63
%70 = fmul double %14, %66
br label %L91
}
.text
Filename: script.jl
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $104, %rsp
movq %r8, %r13
movq %rcx, %r14
movq %rdx, %r12
movq %rsi, %rbx
movq %rdi, -96(%rbp)
Source line: 409
vmovsd (%rbx), %xmm0 # xmm0 = mem[0],zero
Source line: 716
vxorps %xmm1, %xmm1, %xmm1
vcvtsi2sdq %r14, %xmm1, %xmm1
movabsq $pow, %r15
vmovsd %xmm0, -48(%rbp)
vmovapd %xmm1, -64(%rbp)
Source line: 714
callq *%r15
Source line: 315
vucomisd %xmm0, %xmm0
jnp L95
Source line: 714
vmovsd -48(%rbp), %xmm1 # xmm1 = mem[0],zero
vaddsd -64(%rbp), %xmm1, %xmm1
vucomisd %xmm1, %xmm1
jnp L471
L95:
vmovapd %xmm0, -144(%rbp)
Source line: 411
testq %r14, %r14
je L203
Source line: 716
addq $-1, %r14
vxorps %xmm0, %xmm0, %xmm0
vcvtsi2sdq %r14, %xmm0, %xmm1
Source line: 714
vmovsd %xmm1, -80(%rbp)
vmovsd -48(%rbp), %xmm0 # xmm0 = mem[0],zero
callq *%r15
vmovsd -80(%rbp), %xmm1 # xmm1 = mem[0],zero
vaddsd -48(%rbp), %xmm1, %xmm1
Source line: 315
vucomisd %xmm1, %xmm1
jp L160
vucomisd %xmm0, %xmm0
jp L515
L160:
vmovapd -64(%rbp), %xmm2
Source line: 155
vmovddup %xmm2, %xmm1 # xmm1 = xmm2[0,0]
vmulpd 8(%rbx), %xmm1, %xmm1
vmulsd 24(%rbx), %xmm2, %xmm2
Source line: 155
vmovddup %xmm0, %xmm3 # xmm3 = xmm0[0,0]
vmulpd %xmm1, %xmm3, %xmm1
vmovapd %xmm1, -80(%rbp)
vmulsd %xmm2, %xmm0, %xmm0
Source line: 414
vmovapd %xmm0, -112(%rbp)
jmp L221
L203:
vxorpd %xmm0, %xmm0, %xmm0
vmovapd %xmm0, -80(%rbp)
vxorpd %xmm0, %xmm0, %xmm0
vmovapd %xmm0, -112(%rbp)
Source line: 409
L221:
vmovsd (%r12), %xmm0 # xmm0 = mem[0],zero
Source line: 716
vxorps %xmm1, %xmm1, %xmm1
vcvtsi2sdq %r13, %xmm1, %xmm1
vmovsd %xmm0, -48(%rbp)
vmovapd %xmm1, -64(%rbp)
Source line: 714
callq *%r15
Source line: 315
vucomisd %xmm0, %xmm0
jnp L275
Source line: 714
vmovsd -48(%rbp), %xmm1 # xmm1 = mem[0],zero
vaddsd -64(%rbp), %xmm1, %xmm1
vucomisd %xmm1, %xmm1
jnp L493
Source line: 411
L275:
testq %r13, %r13
je L379
vmovapd %xmm0, -128(%rbp)
Source line: 716
addq $-1, %r13
vxorps %xmm0, %xmm0, %xmm0
vcvtsi2sdq %r13, %xmm0, %xmm1
Source line: 714
vmovsd %xmm1, -88(%rbp)
vmovsd -48(%rbp), %xmm0 # xmm0 = mem[0],zero
callq *%r15
vmovsd -88(%rbp), %xmm1 # xmm1 = mem[0],zero
vaddsd -48(%rbp), %xmm1, %xmm1
Source line: 315
vucomisd %xmm1, %xmm1
jp L337
vucomisd %xmm0, %xmm0
jp L537
L337:
vmovapd -64(%rbp), %xmm2
Source line: 155
vmovddup %xmm2, %xmm1 # xmm1 = xmm2[0,0]
vmulpd 8(%r12), %xmm1, %xmm1
vmulsd 24(%r12), %xmm2, %xmm2
Source line: 155
vmovddup %xmm0, %xmm3 # xmm3 = xmm0[0,0]
vmulpd %xmm1, %xmm3, %xmm1
vmulsd %xmm2, %xmm0, %xmm5
vmovapd -128(%rbp), %xmm0
jmp L387
L379:
vxorpd %xmm1, %xmm1, %xmm1
vxorpd %xmm5, %xmm5, %xmm5
L387:
vmovapd -80(%rbp), %xmm4
Source line: 7
vpermilpd $1, %xmm4, %xmm2 # xmm2 = xmm4[1,0]
vunpcklpd -112(%rbp), %xmm2, %xmm2 # xmm2 = xmm2[0],mem[0]
vmovapd -144(%rbp), %xmm3
vunpcklpd %xmm4, %xmm3, %xmm3 # xmm3 = xmm3[0],xmm4[0]
vinsertf128 $1, %xmm2, %ymm3, %ymm2
vpermilpd $1, %xmm1, %xmm3 # xmm3 = xmm1[1,0]
vunpcklpd %xmm5, %xmm3, %xmm3 # xmm3 = xmm3[0],xmm5[0]
vunpcklpd %xmm1, %xmm0, %xmm1 # xmm1 = xmm0[0],xmm1[0]
vinsertf128 $1, %xmm3, %ymm1, %ymm0
vaddpd %ymm0, %ymm2, %ymm0
movq -96(%rbp), %rax
vmovupd %ymm0, (%rax)
addq $104, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
vzeroupper
retq
Source line: 315
L471:
movabsq $jl_throw, %rax
movabsq $140683894675760, %rdi # imm = 0x7FF385915930
callq *%rax
Source line: 315
L493:
movabsq $jl_throw, %rax
movabsq $140683894675760, %rdi # imm = 0x7FF385915930
callq *%rax
Source line: 315
L515:
movabsq $jl_throw, %rax
movabsq $140683894675760, %rdi # imm = 0x7FF385915930
callq *%rax
Source line: 315
L537:
movabsq $jl_throw, %rax
movabsq $140683894675760, %rdi # imm = 0x7FF385915930
callq *%rax
nop
using ForwardDiff: Dual
a = Dual(1., 2., 3., 4.)
b = Dual(5., 6., 7., 8.)
p = 2
q = 3
# f(a, b, p, q) = @inbounds a^p + b^q
f(a, b, p, q) = a^p + b^q
rev = strip(readstring(`git rev-parse --abbrev-ref HEAD`))
open(io -> code_llvm(io, f, typeof.((a, b, p, q))), "$rev.ll", "w")
open(io -> code_native(io, f, typeof.((a, b, p, q))), "$rev.s", "w")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment