Last active
August 29, 2015 13:56
-
-
Save ihaque/8941360 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.globl _msd_axis_major | |
.align 4, 0x90 | |
_msd_axis_major: ## @msd_axis_major | |
.cfi_startproc | |
## BB#0: | |
pushq %rbp | |
Ltmp43: | |
.cfi_def_cfa_offset 16 | |
Ltmp44: | |
.cfi_offset %rbp, -16 | |
movq %rsp, %rbp | |
Ltmp45: | |
.cfi_def_cfa_register %rbp | |
pushq %rbx | |
subq $120, %rsp | |
Ltmp46: | |
.cfi_offset %rbx, -24 | |
movl %edi, %eax | |
movq ___stack_chk_guard@GOTPCREL(%rip), %rbx | |
movq (%rbx), %rdi | |
movq %rdi, -16(%rbp) | |
cmpq %r8, %rcx | |
jne LBB5_2 | |
## BB#1: | |
xorps %xmm2, %xmm2 | |
ucomiss %xmm1, %xmm0 | |
jne LBB5_2 | |
jnp LBB5_7 | |
LBB5_2: | |
testb $3, %sil | |
jne LBB5_9 | |
## BB#3: ## %.preheader | |
movss %xmm0, -120(%rbp) ## 4-byte Spill | |
movss %xmm1, -116(%rbp) ## 4-byte Spill | |
sarl $2, %esi | |
xorps %xmm1, %xmm1 | |
testl %esi, %esi | |
xorps %xmm2, %xmm2 | |
xorps %xmm3, %xmm3 | |
xorps %xmm0, %xmm0 | |
xorps %xmm5, %xmm5 | |
xorps %xmm13, %xmm13 | |
xorps %xmm14, %xmm14 | |
xorps %xmm4, %xmm4 | |
xorps %xmm15, %xmm15 | |
jle LBB5_6 | |
## BB#4: ## %.lr.ph.preheader | |
movslq %edx, %rdx | |
movl %edx, %edi | |
addl %edx, %edi | |
movslq %edi, %rdi | |
xorps %xmm1, %xmm1 | |
movaps %xmm1, %xmm2 | |
xorps %xmm3, %xmm3 | |
movaps %xmm1, %xmm0 | |
movaps %xmm1, %xmm5 | |
movaps %xmm1, %xmm13 | |
xorps %xmm14, %xmm14 | |
movaps %xmm1, %xmm4 | |
movaps %xmm1, %xmm15 | |
.align 4, 0x90 | |
LBB5_5: ## %.lr.ph | |
## =>This Inner Loop Header: Depth=1 | |
movaps %xmm3, -112(%rbp) ## 16-byte Spill | |
movaps %xmm2, -96(%rbp) ## 16-byte Spill | |
movaps %xmm1, -80(%rbp) ## 16-byte Spill | |
movaps (%r8,%rdi,4), %xmm6 | |
movaps (%rcx,%rdi,4), %xmm8 | |
movaps (%r8,%rdx,4), %xmm1 | |
movaps (%rcx,%rdx,4), %xmm3 | |
movaps (%r8), %xmm7 | |
movaps %xmm3, %xmm9 | |
mulps %xmm1, %xmm9 | |
movaps %xmm8, %xmm10 | |
mulps %xmm1, %xmm10 | |
movaps %xmm3, %xmm2 | |
mulps %xmm6, %xmm2 | |
movaps %xmm5, %xmm11 | |
movaps %xmm8, %xmm5 | |
mulps %xmm6, %xmm5 | |
mulps %xmm7, %xmm3 | |
mulps %xmm7, %xmm8 | |
movaps %xmm0, %xmm12 | |
movaps (%rcx), %xmm0 | |
mulps %xmm0, %xmm6 | |
mulps %xmm0, %xmm7 | |
mulps %xmm1, %xmm0 | |
movaps -80(%rbp), %xmm1 ## 16-byte Reload | |
addps %xmm5, %xmm1 | |
movaps %xmm11, %xmm5 | |
addps %xmm2, %xmm4 | |
movaps -96(%rbp), %xmm2 ## 16-byte Reload | |
addps %xmm10, %xmm2 | |
addps %xmm9, %xmm14 | |
addps %xmm6, %xmm5 | |
addps %xmm0, %xmm12 | |
movaps %xmm12, %xmm0 | |
addq $16, %r8 | |
addps %xmm8, %xmm15 | |
addq $16, %rcx | |
addps %xmm3, %xmm13 | |
movaps -112(%rbp), %xmm3 ## 16-byte Reload | |
decl %esi | |
addps %xmm7, %xmm3 | |
jne LBB5_5 | |
LBB5_6: ## %._crit_edge | |
haddps %xmm13, %xmm5 | |
haddps %xmm0, %xmm3 | |
haddps %xmm5, %xmm3 | |
movaps %xmm3, -64(%rbp) | |
haddps %xmm2, %xmm15 | |
haddps %xmm4, %xmm14 | |
haddps %xmm15, %xmm14 | |
movaps %xmm14, -48(%rbp) | |
haddps %xmm2, %xmm1 | |
haddps %xmm5, %xmm1 | |
movaps %xmm1, -32(%rbp) | |
leaq -64(%rbp), %rdi | |
movss -120(%rbp), %xmm0 ## 4-byte Reload | |
movss -116(%rbp), %xmm1 ## 4-byte Reload | |
movl %eax, %esi | |
xorl %edx, %edx | |
xorl %ecx, %ecx | |
callq _msdFromMandG | |
movaps %xmm0, %xmm2 | |
LBB5_7: | |
movq (%rbx), %rax | |
cmpq -16(%rbp), %rax | |
jne LBB5_8 | |
## BB#10: ## %SP_return | |
movaps %xmm2, %xmm0 | |
addq $120, %rsp | |
popq %rbx | |
popq %rbp | |
ret | |
LBB5_9: | |
leaq L___func__.msd_axis_major(%rip), %rdi | |
leaq L_.str2(%rip), %rsi | |
leaq L_.str3(%rip), %rcx | |
movl $402, %edx ## imm = 0x192 | |
callq ___assert_rtn | |
LBB5_8: ## %CallStackCheckFailBlk | |
callq ___stack_chk_fail | |
.cfi_endproc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.globl _msd_axis_major | |
.align 4, 0x90 | |
_msd_axis_major: ## @msd_axis_major | |
.cfi_startproc | |
## BB#0: | |
pushq %rbp | |
Ltmp43: | |
.cfi_def_cfa_offset 16 | |
Ltmp44: | |
.cfi_offset %rbp, -16 | |
movq %rsp, %rbp | |
Ltmp45: | |
.cfi_def_cfa_register %rbp | |
pushq %r15 | |
pushq %r14 | |
pushq %r13 | |
pushq %r12 | |
pushq %rbx | |
subq $184, %rsp | |
Ltmp46: | |
.cfi_offset %rbx, -56 | |
Ltmp47: | |
.cfi_offset %r12, -48 | |
Ltmp48: | |
.cfi_offset %r13, -40 | |
Ltmp49: | |
.cfi_offset %r14, -32 | |
Ltmp50: | |
.cfi_offset %r15, -24 | |
movl %edi, -216(%rbp) ## 4-byte Spill | |
movq ___stack_chk_guard@GOTPCREL(%rip), %rax | |
movq (%rax), %rax | |
movq %rax, -48(%rbp) | |
cmpq %r8, %rcx | |
jne LBB5_2 | |
## BB#1: | |
xorps %xmm2, %xmm2 | |
ucomiss %xmm1, %xmm0 | |
jne LBB5_2 | |
jnp LBB5_54 | |
LBB5_2: | |
movss %xmm0, -224(%rbp) ## 4-byte Spill | |
movss %xmm1, -220(%rbp) ## 4-byte Spill | |
xorps %xmm0, %xmm0 | |
cmpl $0, -216(%rbp) ## 4-byte Folded Reload | |
xorps %xmm12, %xmm12 | |
movaps %xmm0, %xmm1 | |
xorps %xmm2, %xmm2 | |
movaps %xmm2, -144(%rbp) ## 16-byte Spill | |
xorps %xmm5, %xmm5 | |
movaps %xmm0, %xmm3 | |
xorps %xmm2, %xmm2 | |
movaps %xmm2, -128(%rbp) ## 16-byte Spill | |
xorps %xmm13, %xmm13 | |
movaps %xmm0, %xmm2 | |
xorps %xmm0, %xmm0 | |
movaps %xmm0, -112(%rbp) ## 16-byte Spill | |
jle LBB5_53 | |
## BB#3: ## %.lr.ph | |
movl -216(%rbp), %edi ## 4-byte Reload | |
movl %edi, %eax | |
sarl $31, %eax | |
shrl $30, %eax | |
addl %edi, %eax | |
andl $-4, %eax | |
movl %edi, %esi | |
subl %eax, %esi | |
leal 3(%rdi), %eax | |
sarl $31, %eax | |
shrl $30, %eax | |
leal 3(%rdi,%rax), %r10d | |
sarl $2, %r10d | |
movslq %edx, %r9 | |
addl %edx, %edx | |
leal -1(%r10), %r11d | |
xorl %eax, %eax | |
xorl %edi, %edi | |
testl %esi, %esi | |
leaq 12(%rcx,%r9,4), %rbx | |
movslq %edx, %r14 | |
xorps %xmm0, %xmm0 | |
leaq 12(%r8,%r9,4), %r15 | |
leaq 12(%rcx,%r14,4), %rdx | |
leaq 12(%rcx), %r12 | |
leaq 12(%r8,%r14,4), %r13 | |
leaq 12(%r8), %r8 | |
sete -209(%rbp) ## 1-byte Folded Spill | |
movaps %xmm0, %xmm12 | |
movaps %xmm0, %xmm1 | |
movaps %xmm0, -144(%rbp) ## 16-byte Spill | |
xorps %xmm5, %xmm5 | |
movaps %xmm0, %xmm3 | |
movaps %xmm0, -128(%rbp) ## 16-byte Spill | |
movaps %xmm0, %xmm13 | |
movaps %xmm0, %xmm2 | |
xorps %xmm0, %xmm0 | |
movaps %xmm0, -112(%rbp) ## 16-byte Spill | |
.align 4, 0x90 | |
LBB5_4: ## =>This Inner Loop Header: Depth=1 | |
movaps %xmm3, -208(%rbp) ## 16-byte Spill | |
movaps %xmm2, -176(%rbp) ## 16-byte Spill | |
movaps %xmm1, -160(%rbp) ## 16-byte Spill | |
cmpl %eax, %r11d | |
jne LBB5_50 | |
## BB#5: ## in Loop: Header=BB5_4 Depth=1 | |
movss -12(%r12,%rdi), %xmm1 | |
cmpl $1, %esi | |
jne LBB5_7 | |
## BB#6: ## %.thread8.thread | |
## in Loop: Header=BB5_4 Depth=1 | |
movss -12(%rdx,%rdi), %xmm2 | |
movss -12(%rbx,%rdi), %xmm0 | |
xorps %xmm3, %xmm3 | |
movss %xmm1, %xmm3 | |
pshufd $21, %xmm3, %xmm6 ## xmm6 = xmm3[1,1,1,0] | |
pshufd $21, %xmm0, %xmm4 ## xmm4 = xmm0[1,1,1,0] | |
xorb %r14b, %r14b | |
pxor %xmm3, %xmm3 | |
movdqa %xmm3, %xmm8 | |
movdqa %xmm3, %xmm7 | |
jmp LBB5_27 | |
.align 4, 0x90 | |
LBB5_50: ## %.thread64 | |
## in Loop: Header=BB5_4 Depth=1 | |
movups -12(%rdx,%rdi), %xmm7 | |
movups -12(%rbx,%rdi), %xmm4 | |
movups -12(%r15,%rdi), %xmm2 | |
movups -12(%r12,%rdi), %xmm6 | |
movaps %xmm6, %xmm0 | |
mulps %xmm2, %xmm0 | |
movaps %xmm4, %xmm3 | |
mulps %xmm2, %xmm3 | |
mulps %xmm7, %xmm2 | |
movups -12(%r8,%rdi), %xmm1 | |
addps %xmm2, %xmm13 | |
movaps %xmm4, %xmm2 | |
mulps %xmm1, %xmm2 | |
addps %xmm3, %xmm5 | |
addps %xmm0, %xmm12 | |
movaps %xmm6, %xmm0 | |
mulps %xmm1, %xmm0 | |
mulps %xmm7, %xmm1 | |
movaps -128(%rbp), %xmm3 ## 16-byte Reload | |
addps %xmm1, %xmm3 | |
movaps %xmm3, -128(%rbp) ## 16-byte Spill | |
movaps -144(%rbp), %xmm1 ## 16-byte Reload | |
addps %xmm2, %xmm1 | |
movaps %xmm1, -144(%rbp) ## 16-byte Spill | |
movaps -112(%rbp), %xmm1 ## 16-byte Reload | |
addps %xmm0, %xmm1 | |
movaps %xmm1, -112(%rbp) ## 16-byte Spill | |
jmp LBB5_51 | |
.align 4, 0x90 | |
LBB5_7: ## in Loop: Header=BB5_4 Depth=1 | |
movss -8(%r12,%rdi), %xmm0 | |
testl %esi, %esi | |
je LBB5_10 | |
## BB#8: ## in Loop: Header=BB5_4 Depth=1 | |
cmpl $3, %esi | |
jne LBB5_9 | |
LBB5_10: ## in Loop: Header=BB5_4 Depth=1 | |
movss -4(%r12,%rdi), %xmm2 | |
xorps %xmm6, %xmm6 | |
testl %esi, %esi | |
jne LBB5_12 | |
## BB#11: ## in Loop: Header=BB5_4 Depth=1 | |
movss (%r12,%rdi), %xmm6 | |
LBB5_12: ## %.thread4 | |
## in Loop: Header=BB5_4 Depth=1 | |
unpcklps %xmm0, %xmm6 ## xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1] | |
unpcklps %xmm2, %xmm6 ## xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1] | |
movb -209(%rbp), %cl ## 1-byte Reload | |
jmp LBB5_13 | |
LBB5_9: ## %.thread6 | |
## in Loop: Header=BB5_4 Depth=1 | |
xorps %xmm2, %xmm2 | |
movss %xmm0, %xmm2 | |
pshufd $69, %xmm2, %xmm6 ## xmm6 = xmm2[1,1,0,1] | |
xorb %cl, %cl | |
LBB5_13: ## in Loop: Header=BB5_4 Depth=1 | |
movss -8(%rbx,%rdi), %xmm0 | |
movss -12(%rbx,%rdi), %xmm2 | |
movaps %xmm6, %xmm3 | |
movss %xmm1, %xmm3 | |
shufps $36, %xmm3, %xmm6 ## xmm6 = xmm6[0,1],xmm3[2,0] | |
testl %esi, %esi | |
je LBB5_15 | |
## BB#14: ## in Loop: Header=BB5_4 Depth=1 | |
cmpl $3, %esi | |
jne LBB5_17 | |
LBB5_15: ## in Loop: Header=BB5_4 Depth=1 | |
movss -4(%rbx,%rdi), %xmm1 | |
testb %cl, %cl | |
jne LBB5_18 | |
## BB#16: ## in Loop: Header=BB5_4 Depth=1 | |
xorb %r9b, %r9b | |
xorps %xmm4, %xmm4 | |
jmp LBB5_19 | |
LBB5_17: ## in Loop: Header=BB5_4 Depth=1 | |
xorb %r9b, %r9b | |
xorps %xmm1, %xmm1 | |
cmpb $1, %cl | |
movaps %xmm1, %xmm4 | |
jne LBB5_19 | |
LBB5_18: ## in Loop: Header=BB5_4 Depth=1 | |
movss (%rbx,%rdi), %xmm4 | |
movb $1, %r9b | |
LBB5_19: ## in Loop: Header=BB5_4 Depth=1 | |
unpcklps %xmm2, %xmm1 ## xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] | |
unpcklps %xmm0, %xmm4 ## xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] | |
unpcklps %xmm1, %xmm4 ## xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] | |
movss -12(%rdx,%rdi), %xmm2 | |
xorps %xmm3, %xmm3 | |
cmpl $1, %esi | |
pxor %xmm8, %xmm8 | |
je LBB5_24 | |
## BB#20: ## in Loop: Header=BB5_4 Depth=1 | |
movss -8(%rdx,%rdi), %xmm8 | |
testl %esi, %esi | |
je LBB5_22 | |
## BB#21: ## in Loop: Header=BB5_4 Depth=1 | |
cmpl $3, %esi | |
jne LBB5_24 | |
LBB5_22: ## in Loop: Header=BB5_4 Depth=1 | |
movss -4(%rdx,%rdi), %xmm3 | |
testb %r9b, %r9b | |
jne LBB5_26 | |
## BB#23: ## in Loop: Header=BB5_4 Depth=1 | |
xorb %r14b, %r14b | |
pxor %xmm7, %xmm7 | |
jmp LBB5_27 | |
LBB5_24: ## %.thread8 | |
## in Loop: Header=BB5_4 Depth=1 | |
xorb %r14b, %r14b | |
cmpb $1, %r9b | |
jne LBB5_25 | |
LBB5_26: ## in Loop: Header=BB5_4 Depth=1 | |
movss (%rdx,%rdi), %xmm7 | |
movb $1, %r14b | |
jmp LBB5_27 | |
LBB5_25: ## in Loop: Header=BB5_4 Depth=1 | |
movaps %xmm3, %xmm7 | |
.align 4, 0x90 | |
LBB5_27: ## in Loop: Header=BB5_4 Depth=1 | |
movss -12(%r8,%rdi), %xmm0 | |
xorps %xmm1, %xmm1 | |
cmpl $1, %esi | |
movaps %xmm1, %xmm10 | |
movaps %xmm1, %xmm11 | |
je LBB5_32 | |
## BB#28: ## in Loop: Header=BB5_4 Depth=1 | |
movss -8(%r8,%rdi), %xmm10 | |
testl %esi, %esi | |
je LBB5_31 | |
## BB#29: ## in Loop: Header=BB5_4 Depth=1 | |
cmpl $3, %esi | |
jne LBB5_30 | |
LBB5_31: ## in Loop: Header=BB5_4 Depth=1 | |
movss -4(%r8,%rdi), %xmm11 | |
jmp LBB5_32 | |
LBB5_30: ## in Loop: Header=BB5_4 Depth=1 | |
movaps %xmm1, %xmm11 | |
.align 4, 0x90 | |
LBB5_32: ## %.thread9 | |
## in Loop: Header=BB5_4 Depth=1 | |
cmpb $1, %r14b | |
jne LBB5_34 | |
## BB#33: ## in Loop: Header=BB5_4 Depth=1 | |
movss (%r8,%rdi), %xmm1 | |
LBB5_34: ## in Loop: Header=BB5_4 Depth=1 | |
movaps %xmm13, %xmm15 | |
movaps %xmm12, -192(%rbp) ## 16-byte Spill | |
movss -12(%r15,%rdi), %xmm12 | |
xorps %xmm9, %xmm9 | |
cmpl $1, %esi | |
jne LBB5_36 | |
## BB#35: ## in Loop: Header=BB5_4 Depth=1 | |
movaps %xmm9, %xmm14 | |
movaps %xmm9, %xmm13 | |
jmp LBB5_41 | |
.align 4, 0x90 | |
LBB5_36: ## in Loop: Header=BB5_4 Depth=1 | |
movss -8(%r15,%rdi), %xmm13 | |
testl %esi, %esi | |
je LBB5_39 | |
## BB#37: ## in Loop: Header=BB5_4 Depth=1 | |
cmpl $3, %esi | |
jne LBB5_38 | |
LBB5_39: ## in Loop: Header=BB5_4 Depth=1 | |
movss -4(%r15,%rdi), %xmm14 | |
testl %esi, %esi | |
jne LBB5_41 | |
## BB#40: ## in Loop: Header=BB5_4 Depth=1 | |
movss (%r15,%rdi), %xmm9 | |
jmp LBB5_41 | |
LBB5_38: ## in Loop: Header=BB5_4 Depth=1 | |
movaps %xmm9, %xmm14 | |
.align 4, 0x90 | |
LBB5_41: ## in Loop: Header=BB5_4 Depth=1 | |
unpcklps %xmm2, %xmm3 ## xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] | |
unpcklps %xmm8, %xmm7 ## xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1] | |
unpcklps %xmm3, %xmm7 ## xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1] | |
unpcklps %xmm0, %xmm11 ## xmm11 = xmm11[0],xmm0[0],xmm11[1],xmm0[1] | |
unpcklps %xmm10, %xmm1 ## xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1] | |
unpcklps %xmm11, %xmm1 ## xmm1 = xmm1[0],xmm11[0],xmm1[1],xmm11[1] | |
movaps %xmm6, %xmm0 | |
mulps %xmm1, %xmm0 | |
movaps %xmm4, %xmm2 | |
mulps %xmm1, %xmm2 | |
mulps %xmm7, %xmm1 | |
movaps -128(%rbp), %xmm3 ## 16-byte Reload | |
addps %xmm1, %xmm3 | |
movaps %xmm3, -128(%rbp) ## 16-byte Spill | |
movaps -144(%rbp), %xmm1 ## 16-byte Reload | |
addps %xmm2, %xmm1 | |
movaps %xmm1, -144(%rbp) ## 16-byte Spill | |
movaps -112(%rbp), %xmm1 ## 16-byte Reload | |
addps %xmm0, %xmm1 | |
movaps %xmm1, -112(%rbp) ## 16-byte Spill | |
unpcklps %xmm12, %xmm14 ## xmm14 = xmm14[0],xmm12[0],xmm14[1],xmm12[1] | |
unpcklps %xmm13, %xmm9 ## xmm9 = xmm9[0],xmm13[0],xmm9[1],xmm13[1] | |
unpcklps %xmm14, %xmm9 ## xmm9 = xmm9[0],xmm14[0],xmm9[1],xmm14[1] | |
movaps %xmm7, %xmm0 | |
mulps %xmm9, %xmm0 | |
movaps %xmm15, %xmm13 | |
addps %xmm0, %xmm13 | |
movaps %xmm4, %xmm0 | |
mulps %xmm9, %xmm0 | |
addps %xmm0, %xmm5 | |
mulps %xmm6, %xmm9 | |
movaps -192(%rbp), %xmm12 ## 16-byte Reload | |
addps %xmm9, %xmm12 | |
cmpl %eax, %r11d | |
jne LBB5_51 | |
## BB#42: ## in Loop: Header=BB5_4 Depth=1 | |
movss -12(%r13,%rdi), %xmm0 | |
xorps %xmm1, %xmm1 | |
cmpl $1, %esi | |
jne LBB5_44 | |
## BB#43: ## in Loop: Header=BB5_4 Depth=1 | |
movaps %xmm1, %xmm3 | |
movaps %xmm1, %xmm2 | |
jmp LBB5_49 | |
.align 4, 0x90 | |
LBB5_51: ## in Loop: Header=BB5_4 Depth=1 | |
movups -12(%r13,%rdi), %xmm1 | |
jmp LBB5_52 | |
LBB5_44: ## in Loop: Header=BB5_4 Depth=1 | |
movss -8(%r13,%rdi), %xmm2 | |
testl %esi, %esi | |
je LBB5_47 | |
## BB#45: ## in Loop: Header=BB5_4 Depth=1 | |
cmpl $3, %esi | |
jne LBB5_46 | |
LBB5_47: ## in Loop: Header=BB5_4 Depth=1 | |
movss -4(%r13,%rdi), %xmm3 | |
testl %esi, %esi | |
jne LBB5_49 | |
## BB#48: ## in Loop: Header=BB5_4 Depth=1 | |
movss (%r13,%rdi), %xmm1 | |
jmp LBB5_49 | |
LBB5_46: ## in Loop: Header=BB5_4 Depth=1 | |
movaps %xmm1, %xmm3 | |
.align 4, 0x90 | |
LBB5_49: ## %.thread22 | |
## in Loop: Header=BB5_4 Depth=1 | |
unpcklps %xmm0, %xmm3 ## xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] | |
unpcklps %xmm2, %xmm1 ## xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] | |
unpcklps %xmm3, %xmm1 ## xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] | |
LBB5_52: ## in Loop: Header=BB5_4 Depth=1 | |
mulps %xmm1, %xmm7 | |
movaps -176(%rbp), %xmm0 ## 16-byte Reload | |
addps %xmm7, %xmm0 | |
movaps %xmm0, %xmm2 | |
mulps %xmm1, %xmm6 | |
mulps %xmm1, %xmm4 | |
movaps -208(%rbp), %xmm0 ## 16-byte Reload | |
addps %xmm4, %xmm0 | |
movaps %xmm0, %xmm3 | |
movaps -160(%rbp), %xmm1 ## 16-byte Reload | |
addps %xmm6, %xmm1 | |
addq $16, %rdi | |
incl %eax | |
cmpl %r10d, %eax | |
jl LBB5_4 | |
LBB5_53: ## %._crit_edge | |
haddps -144(%rbp), %xmm1 ## 16-byte Folded Reload | |
movaps -112(%rbp), %xmm0 ## 16-byte Reload | |
haddps %xmm12, %xmm0 | |
haddps %xmm1, %xmm0 | |
movaps %xmm0, -96(%rbp) | |
movaps -128(%rbp), %xmm0 ## 16-byte Reload | |
haddps %xmm13, %xmm0 | |
haddps %xmm3, %xmm5 | |
haddps %xmm0, %xmm5 | |
movaps %xmm5, -80(%rbp) | |
haddps %xmm13, %xmm2 | |
haddps %xmm1, %xmm2 | |
movaps %xmm2, -64(%rbp) | |
leaq -96(%rbp), %rdi | |
movss -224(%rbp), %xmm0 ## 4-byte Reload | |
movss -220(%rbp), %xmm1 ## 4-byte Reload | |
movl -216(%rbp), %esi ## 4-byte Reload | |
xorl %edx, %edx | |
xorl %ecx, %ecx | |
callq _msdFromMandG | |
movaps %xmm0, %xmm2 | |
LBB5_54: | |
movq ___stack_chk_guard@GOTPCREL(%rip), %rax | |
movq (%rax), %rax | |
cmpq -48(%rbp), %rax | |
jne LBB5_56 | |
## BB#55: ## %SP_return | |
movaps %xmm2, %xmm0 | |
addq $184, %rsp | |
popq %rbx | |
popq %r12 | |
popq %r13 | |
popq %r14 | |
popq %r15 | |
popq %rbp | |
ret | |
LBB5_56: ## %CallStackCheckFailBlk | |
callq ___stack_chk_fail | |
.cfi_endproc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.globl _msd_axis_major | |
.align 4, 0x90 | |
_msd_axis_major: | |
Leh_func_begin7: | |
pushq %rbp | |
Ltmp18: | |
movq %rsp, %rbp | |
Ltmp19: | |
subq $48, %rsp | |
Ltmp20: | |
movl %edi, %eax | |
cmpq %r8, %rcx | |
jne LBB7_2 | |
ucomiss %xmm1, %xmm0 | |
setnp %dil | |
sete %r9b | |
testb %dil, %r9b | |
jne LBB7_9 | |
LBB7_2: | |
movl %esi, %edi | |
sarl $2, %edi | |
testb $3, %sil | |
jne LBB7_5 | |
testl %edi, %edi | |
jg LBB7_6 | |
pxor %xmm2, %xmm2 | |
movaps %xmm2, %xmm3 | |
movaps %xmm2, %xmm4 | |
movaps %xmm2, %xmm5 | |
movaps %xmm2, %xmm6 | |
movaps %xmm2, %xmm7 | |
movaps %xmm2, %xmm8 | |
movaps %xmm2, %xmm9 | |
movaps %xmm2, %xmm10 | |
jmp LBB7_8 | |
LBB7_5: | |
leaq ___func__.10832(%rip), %rdi | |
leaq L_.str2(%rip), %rsi | |
movl $402, %edx | |
leaq L_.str3(%rip), %rcx | |
callq ___assert_rtn | |
LBB7_6: | |
movslq %edx, %rdx | |
movl %edi, %esi | |
leal (%rdx,%rdx), %edi | |
movslq %edi, %rdi | |
xorps %xmm2, %xmm2 | |
movaps %xmm2, %xmm3 | |
movaps %xmm2, %xmm4 | |
movaps %xmm2, %xmm5 | |
movaps %xmm2, %xmm6 | |
movaps %xmm2, %xmm7 | |
movaps %xmm2, %xmm8 | |
movaps %xmm2, %xmm9 | |
movaps %xmm2, %xmm10 | |
.align 4, 0x90 | |
LBB7_7: | |
movaps (%r8,%rdi,4), %xmm11 | |
movaps (%rcx,%rdi,4), %xmm12 | |
movaps %xmm12, %xmm13 | |
mulps %xmm11, %xmm13 | |
addps %xmm13, %xmm2 | |
movaps (%rcx,%rdx,4), %xmm13 | |
movaps %xmm13, %xmm14 | |
mulps %xmm11, %xmm14 | |
addps %xmm14, %xmm10 | |
movaps (%r8,%rdx,4), %xmm14 | |
movaps %xmm12, %xmm15 | |
mulps %xmm14, %xmm15 | |
addps %xmm15, %xmm3 | |
movaps %xmm13, %xmm15 | |
mulps %xmm14, %xmm15 | |
addps %xmm15, %xmm9 | |
movaps (%rcx), %xmm15 | |
mulps %xmm15, %xmm11 | |
addps %xmm11, %xmm7 | |
mulps %xmm15, %xmm14 | |
addps %xmm14, %xmm6 | |
movaps (%r8), %xmm11 | |
mulps %xmm11, %xmm12 | |
addps %xmm12, %xmm4 | |
mulps %xmm11, %xmm13 | |
addps %xmm13, %xmm8 | |
mulps %xmm11, %xmm15 | |
addps %xmm15, %xmm5 | |
addq $16, %r8 | |
addq $16, %rcx | |
decq %rsi | |
jne LBB7_7 | |
LBB7_8: | |
haddps %xmm8, %xmm7 | |
haddps %xmm6, %xmm5 | |
haddps %xmm7, %xmm5 | |
movapd %xmm5, -48(%rbp) | |
haddps %xmm3, %xmm4 | |
haddps %xmm10, %xmm9 | |
haddps %xmm4, %xmm9 | |
movapd %xmm9, -32(%rbp) | |
haddps %xmm3, %xmm2 | |
haddps %xmm7, %xmm2 | |
movapd %xmm2, -16(%rbp) | |
leaq -48(%rbp), %rdi | |
xorl %edx, %edx | |
xorl %ecx, %ecx | |
movl %eax, %esi | |
callq _msdFromMandG | |
jmp LBB7_10 | |
LBB7_9: | |
pxor %xmm0, %xmm0 | |
LBB7_10: | |
addq $48, %rsp | |
popq %rbp | |
ret | |
Leh_func_end7: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.globl _msd_axis_major | |
.align 4, 0x90 | |
_msd_axis_major: | |
Leh_func_begin7: | |
pushq %rbp | |
Ltmp18: | |
movq %rsp, %rbp | |
Ltmp19: | |
pushq %r15 | |
pushq %r14 | |
pushq %rbx | |
subq $72, %rsp | |
Ltmp20: | |
movl %edi, %eax | |
cmpq %r8, %rcx | |
movss %xmm1, -84(%rbp) | |
jne LBB7_2 | |
ucomiss %xmm1, %xmm0 | |
setnp %sil | |
sete %dil | |
testb %sil, %dil | |
jne LBB7_76 | |
LBB7_2: | |
movl %eax, %esi | |
sarl $31, %esi | |
shrl $30, %esi | |
leal 3(%rax), %edi | |
cmpl $3, %edi | |
jg LBB7_4 | |
pxor %xmm1, %xmm1 | |
movaps %xmm1, %xmm2 | |
movaps %xmm1, %xmm3 | |
movaps %xmm1, %xmm4 | |
movaps %xmm1, %xmm5 | |
movaps %xmm1, %xmm6 | |
movaps %xmm1, %xmm7 | |
movaps %xmm1, %xmm8 | |
movaps %xmm1, %xmm9 | |
jmp LBB7_75 | |
LBB7_4: | |
addl %eax, %esi | |
andl $-4, %esi | |
movl %eax, %r9d | |
subl %esi, %r9d | |
movl %edi, %esi | |
sarl $31, %esi | |
shrl $30, %esi | |
addl %edi, %esi | |
sarl $2, %esi | |
decl %esi | |
leaq 1(%rsi), %r10 | |
cmpl $7, %edi | |
movl $1, %edi | |
cmovgq %r10, %rdi | |
movslq %edx, %rdx | |
leaq 12(%rcx,%rdx,4), %r10 | |
leal (%rdx,%rdx), %r11d | |
movslq %r11d, %r11 | |
leaq 12(%rcx,%r11,4), %rbx | |
leaq 12(%r8,%rdx,4), %rdx | |
leaq 12(%r8,%r11,4), %r11 | |
addq $12, %rcx | |
addq $12, %r8 | |
xorps %xmm1, %xmm1 | |
xorl %r14d, %r14d | |
movq %r14, %r15 | |
movaps %xmm1, %xmm2 | |
movaps %xmm1, %xmm3 | |
movaps %xmm1, %xmm4 | |
movaps %xmm1, %xmm5 | |
movaps %xmm1, %xmm6 | |
movaps %xmm1, %xmm7 | |
movaps %xmm1, %xmm8 | |
movaps %xmm1, %xmm9 | |
.align 4, 0x90 | |
LBB7_5: | |
cmpl %esi, %r15d | |
jne LBB7_47 | |
testl %r9d, %r9d | |
je LBB7_8 | |
pxor %xmm10, %xmm10 | |
jmp LBB7_9 | |
LBB7_8: | |
movss (%rcx,%r14), %xmm10 | |
LBB7_9: | |
testl %r9d, %r9d | |
je LBB7_12 | |
cmpl $3, %r9d | |
je LBB7_12 | |
pxor %xmm11, %xmm11 | |
jmp LBB7_13 | |
LBB7_12: | |
movss -4(%rcx,%r14), %xmm11 | |
LBB7_13: | |
cmpl $1, %r9d | |
jne LBB7_15 | |
pxor %xmm12, %xmm12 | |
jmp LBB7_16 | |
LBB7_15: | |
movss -8(%rcx,%r14), %xmm12 | |
LBB7_16: | |
unpcklps %xmm12, %xmm10 | |
movss -12(%rcx,%r14), %xmm12 | |
unpcklps %xmm12, %xmm11 | |
unpcklps %xmm11, %xmm10 | |
testl %r9d, %r9d | |
je LBB7_18 | |
pxor %xmm11, %xmm11 | |
jmp LBB7_19 | |
LBB7_18: | |
movss (%r10,%r14), %xmm11 | |
LBB7_19: | |
testl %r9d, %r9d | |
je LBB7_22 | |
cmpl $3, %r9d | |
je LBB7_22 | |
pxor %xmm12, %xmm12 | |
jmp LBB7_23 | |
LBB7_22: | |
movss -4(%r10,%r14), %xmm12 | |
LBB7_23: | |
cmpl $1, %r9d | |
jne LBB7_25 | |
pxor %xmm13, %xmm13 | |
jmp LBB7_26 | |
LBB7_25: | |
movss -8(%r10,%r14), %xmm13 | |
LBB7_26: | |
unpcklps %xmm13, %xmm11 | |
movss -12(%r10,%r14), %xmm13 | |
unpcklps %xmm13, %xmm12 | |
unpcklps %xmm12, %xmm11 | |
testl %r9d, %r9d | |
je LBB7_28 | |
pxor %xmm12, %xmm12 | |
jmp LBB7_29 | |
LBB7_28: | |
movss (%rbx,%r14), %xmm12 | |
LBB7_29: | |
testl %r9d, %r9d | |
je LBB7_32 | |
cmpl $3, %r9d | |
je LBB7_32 | |
pxor %xmm13, %xmm13 | |
jmp LBB7_33 | |
LBB7_32: | |
movss -4(%rbx,%r14), %xmm13 | |
LBB7_33: | |
cmpl $1, %r9d | |
jne LBB7_35 | |
pxor %xmm14, %xmm14 | |
jmp LBB7_36 | |
LBB7_35: | |
movss -8(%rbx,%r14), %xmm14 | |
LBB7_36: | |
unpcklps %xmm14, %xmm12 | |
movss -12(%rbx,%r14), %xmm14 | |
unpcklps %xmm14, %xmm13 | |
unpcklps %xmm13, %xmm12 | |
testl %r9d, %r9d | |
je LBB7_38 | |
pxor %xmm13, %xmm13 | |
jmp LBB7_39 | |
LBB7_38: | |
movss (%r8,%r14), %xmm13 | |
LBB7_39: | |
testl %r9d, %r9d | |
je LBB7_42 | |
cmpl $3, %r9d | |
je LBB7_42 | |
pxor %xmm14, %xmm14 | |
jmp LBB7_43 | |
LBB7_42: | |
movss -4(%r8,%r14), %xmm14 | |
LBB7_43: | |
cmpl $1, %r9d | |
jne LBB7_45 | |
pxor %xmm15, %xmm15 | |
jmp LBB7_46 | |
LBB7_45: | |
movss -8(%r8,%r14), %xmm15 | |
LBB7_46: | |
unpcklps %xmm15, %xmm13 | |
movss -12(%r8,%r14), %xmm15 | |
unpcklps %xmm15, %xmm14 | |
unpcklps %xmm14, %xmm13 | |
jmp LBB7_48 | |
LBB7_47: | |
movups -12(%r8,%r14), %xmm13 | |
movups -12(%rbx,%r14), %xmm12 | |
movups -12(%r10,%r14), %xmm11 | |
movups -12(%rcx,%r14), %xmm10 | |
LBB7_48: | |
cmpl %esi, %r15d | |
movaps %xmm12, %xmm14 | |
mulps %xmm13, %xmm14 | |
addps %xmm14, %xmm7 | |
movaps %xmm11, %xmm14 | |
mulps %xmm13, %xmm14 | |
addps %xmm14, %xmm4 | |
mulps %xmm10, %xmm13 | |
addps %xmm13, %xmm1 | |
jne LBB7_60 | |
testl %r9d, %r9d | |
je LBB7_51 | |
pxor %xmm13, %xmm13 | |
jmp LBB7_52 | |
LBB7_51: | |
movss (%rdx,%r14), %xmm13 | |
LBB7_52: | |
testl %r9d, %r9d | |
je LBB7_55 | |
cmpl $3, %r9d | |
je LBB7_55 | |
pxor %xmm14, %xmm14 | |
jmp LBB7_56 | |
LBB7_55: | |
movss -4(%rdx,%r14), %xmm14 | |
LBB7_56: | |
cmpl $1, %r9d | |
jne LBB7_58 | |
pxor %xmm15, %xmm15 | |
jmp LBB7_59 | |
LBB7_58: | |
movss -8(%rdx,%r14), %xmm15 | |
LBB7_59: | |
unpcklps %xmm15, %xmm13 | |
movss -12(%rdx,%r14), %xmm15 | |
unpcklps %xmm15, %xmm14 | |
unpcklps %xmm14, %xmm13 | |
jmp LBB7_61 | |
LBB7_60: | |
movups -12(%rdx,%r14), %xmm13 | |
LBB7_61: | |
cmpl %esi, %r15d | |
movaps %xmm12, %xmm14 | |
mulps %xmm13, %xmm14 | |
addps %xmm14, %xmm8 | |
movaps %xmm11, %xmm14 | |
mulps %xmm13, %xmm14 | |
addps %xmm14, %xmm5 | |
mulps %xmm10, %xmm13 | |
addps %xmm13, %xmm2 | |
jne LBB7_73 | |
testl %r9d, %r9d | |
je LBB7_64 | |
pxor %xmm13, %xmm13 | |
jmp LBB7_65 | |
LBB7_64: | |
movss (%r11,%r14), %xmm13 | |
LBB7_65: | |
testl %r9d, %r9d | |
je LBB7_68 | |
cmpl $3, %r9d | |
je LBB7_68 | |
pxor %xmm14, %xmm14 | |
jmp LBB7_69 | |
LBB7_68: | |
movss -4(%r11,%r14), %xmm14 | |
LBB7_69: | |
cmpl $1, %r9d | |
jne LBB7_71 | |
pxor %xmm15, %xmm15 | |
jmp LBB7_72 | |
LBB7_71: | |
movss -8(%r11,%r14), %xmm15 | |
LBB7_72: | |
unpcklps %xmm15, %xmm13 | |
movss -12(%r11,%r14), %xmm15 | |
unpcklps %xmm15, %xmm14 | |
unpcklps %xmm14, %xmm13 | |
jmp LBB7_74 | |
LBB7_73: | |
movups -12(%r11,%r14), %xmm13 | |
LBB7_74: | |
addq $16, %r14 | |
incq %r15 | |
cmpq %r15, %rdi | |
mulps %xmm13, %xmm12 | |
addps %xmm12, %xmm9 | |
mulps %xmm13, %xmm11 | |
addps %xmm11, %xmm6 | |
mulps %xmm13, %xmm10 | |
addps %xmm10, %xmm3 | |
jne LBB7_5 | |
LBB7_75: | |
haddps %xmm4, %xmm3 | |
haddps %xmm2, %xmm1 | |
haddps %xmm3, %xmm1 | |
movapd %xmm1, -80(%rbp) | |
haddps %xmm8, %xmm7 | |
haddps %xmm6, %xmm5 | |
haddps %xmm7, %xmm5 | |
movapd %xmm5, -64(%rbp) | |
haddps %xmm8, %xmm9 | |
haddps %xmm3, %xmm9 | |
movapd %xmm9, -48(%rbp) | |
leaq -80(%rbp), %rdi | |
xorl %edx, %edx | |
xorl %ecx, %ecx | |
movss -84(%rbp), %xmm1 | |
movl %eax, %esi | |
callq _msdFromMandG | |
jmp LBB7_77 | |
LBB7_76: | |
pxor %xmm0, %xmm0 | |
LBB7_77: | |
addq $72, %rsp | |
popq %rbx | |
popq %r14 | |
popq %r15 | |
popq %rbp | |
ret | |
Leh_func_end7: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment