Created
May 4, 2019 04:41
-
-
Save ychen306/1accc511e0401d3a5777e219c5f02ee9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vmovups ymm0, ymmword ptr [rdi - 64] | |
vmovups ymm1, ymmword ptr [rdi - 32] | |
vbroadcastss ymm2, dword ptr [rsi - 32] | |
vbroadcastss ymm3, dword ptr [rsi - 28] | |
prefetcht0 byte ptr [rdi + 512] | |
vfmadd231ps ymm8, ymm0, ymm2 | |
vfmadd231ps ymm12, ymm1, ymm2 | |
vfnmadd231ps ymm9, ymm0, ymm3 | |
vfnmadd231ps ymm13, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 24] | |
vbroadcastss ymm3, dword ptr [rsi - 20] | |
vfmadd231ps ymm10, ymm0, ymm2 | |
vfmadd231ps ymm14, ymm1, ymm2 | |
vfnmadd231ps ymm11, ymm0, ymm3 | |
vfnmadd231ps ymm15, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 16] | |
vbroadcastss ymm3, dword ptr [rsi - 12] | |
vfmadd231ps ymm4, ymm0, ymm2 | |
vfmadd231ps ymm6, ymm1, ymm2 | |
vfnmadd231ps ymm5, ymm0, ymm3 | |
vfnmadd231ps ymm7, ymm1, ymm3 | |
add rsi, 24 | |
add rdi, 64 | |
dec rax | |
vmovups ymm0, ymmword ptr [rdi - 64] | |
vmovups ymm1, ymmword ptr [rdi - 32] | |
vbroadcastss ymm2, dword ptr [rsi - 32] | |
vbroadcastss ymm3, dword ptr [rsi - 28] | |
prefetcht0 byte ptr [rdi + 512] | |
vfmadd231ps ymm8, ymm0, ymm2 | |
vfmadd231ps ymm12, ymm1, ymm2 | |
vfnmadd231ps ymm9, ymm0, ymm3 | |
vfnmadd231ps ymm13, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 24] | |
vbroadcastss ymm3, dword ptr [rsi - 20] | |
vfmadd231ps ymm10, ymm0, ymm2 | |
vfmadd231ps ymm14, ymm1, ymm2 | |
vfnmadd231ps ymm11, ymm0, ymm3 | |
vfnmadd231ps ymm15, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 16] | |
vbroadcastss ymm3, dword ptr [rsi - 12] | |
vfmadd231ps ymm4, ymm0, ymm2 | |
vfmadd231ps ymm6, ymm1, ymm2 | |
vfnmadd231ps ymm5, ymm0, ymm3 | |
vfnmadd231ps ymm7, ymm1, ymm3 | |
add rsi, 24 | |
add rdi, 64 | |
dec rax | |
vmovups ymm0, ymmword ptr [rdi - 64] | |
vmovups ymm1, ymmword ptr [rdi - 32] | |
vbroadcastss ymm2, dword ptr [rsi - 32] | |
vbroadcastss ymm3, dword ptr [rsi - 28] | |
prefetcht0 byte ptr [rdi + 512] | |
vfmadd231ps ymm8, ymm0, ymm2 | |
vfmadd231ps ymm12, ymm1, ymm2 | |
vfnmadd231ps ymm9, ymm0, ymm3 | |
vfnmadd231ps ymm13, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 24] | |
vbroadcastss ymm3, dword ptr [rsi - 20] | |
vfmadd231ps ymm10, ymm0, ymm2 | |
vfmadd231ps ymm14, ymm1, ymm2 | |
vfnmadd231ps ymm11, ymm0, ymm3 | |
vfnmadd231ps ymm15, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 16] | |
vbroadcastss ymm3, dword ptr [rsi - 12] | |
vfmadd231ps ymm4, ymm0, ymm2 | |
vfmadd231ps ymm6, ymm1, ymm2 | |
vfnmadd231ps ymm5, ymm0, ymm3 | |
vfnmadd231ps ymm7, ymm1, ymm3 | |
add rsi, 24 | |
add rdi, 64 | |
dec rax | |
vmovups ymm0, ymmword ptr [rdi - 64] | |
vmovups ymm1, ymmword ptr [rdi - 32] | |
vbroadcastss ymm2, dword ptr [rsi - 32] | |
vbroadcastss ymm3, dword ptr [rsi - 28] | |
prefetcht0 byte ptr [rdi + 512] | |
vfmadd231ps ymm8, ymm0, ymm2 | |
vfmadd231ps ymm12, ymm1, ymm2 | |
vfnmadd231ps ymm9, ymm0, ymm3 | |
vfnmadd231ps ymm13, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 24] | |
vbroadcastss ymm3, dword ptr [rsi - 20] | |
vfmadd231ps ymm10, ymm0, ymm2 | |
vfmadd231ps ymm14, ymm1, ymm2 | |
vfnmadd231ps ymm11, ymm0, ymm3 | |
vfnmadd231ps ymm15, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 16] | |
vbroadcastss ymm3, dword ptr [rsi - 12] | |
vfmadd231ps ymm4, ymm0, ymm2 | |
vfmadd231ps ymm6, ymm1, ymm2 | |
vfnmadd231ps ymm5, ymm0, ymm3 | |
vfnmadd231ps ymm7, ymm1, ymm3 | |
add rsi, 24 | |
add rdi, 64 | |
dec rax | |
vmovups ymm0, ymmword ptr [rdi - 64] | |
vmovups ymm1, ymmword ptr [rdi - 32] | |
vbroadcastss ymm2, dword ptr [rsi - 32] | |
vbroadcastss ymm3, dword ptr [rsi - 28] | |
prefetcht0 byte ptr [rdi + 512] | |
vfmadd231ps ymm8, ymm0, ymm2 | |
vfmadd231ps ymm12, ymm1, ymm2 | |
vfnmadd231ps ymm9, ymm0, ymm3 | |
vfnmadd231ps ymm13, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 24] | |
vbroadcastss ymm3, dword ptr [rsi - 20] | |
vfmadd231ps ymm10, ymm0, ymm2 | |
vfmadd231ps ymm14, ymm1, ymm2 | |
vfnmadd231ps ymm11, ymm0, ymm3 | |
vfnmadd231ps ymm15, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 16] | |
vbroadcastss ymm3, dword ptr [rsi - 12] | |
vfmadd231ps ymm4, ymm0, ymm2 | |
vfmadd231ps ymm6, ymm1, ymm2 | |
vfnmadd231ps ymm5, ymm0, ymm3 | |
vfnmadd231ps ymm7, ymm1, ymm3 | |
add rsi, 24 | |
add rdi, 64 | |
dec rax | |
vmovups ymm0, ymmword ptr [rdi - 64] | |
vmovups ymm1, ymmword ptr [rdi - 32] | |
vbroadcastss ymm2, dword ptr [rsi - 32] | |
vbroadcastss ymm3, dword ptr [rsi - 28] | |
prefetcht0 byte ptr [rdi + 512] | |
vfmadd231ps ymm8, ymm0, ymm2 | |
vfmadd231ps ymm12, ymm1, ymm2 | |
vfnmadd231ps ymm9, ymm0, ymm3 | |
vfnmadd231ps ymm13, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 24] | |
vbroadcastss ymm3, dword ptr [rsi - 20] | |
vfmadd231ps ymm10, ymm0, ymm2 | |
vfmadd231ps ymm14, ymm1, ymm2 | |
vfnmadd231ps ymm11, ymm0, ymm3 | |
vfnmadd231ps ymm15, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 16] | |
vbroadcastss ymm3, dword ptr [rsi - 12] | |
vfmadd231ps ymm4, ymm0, ymm2 | |
vfmadd231ps ymm6, ymm1, ymm2 | |
vfnmadd231ps ymm5, ymm0, ymm3 | |
vfnmadd231ps ymm7, ymm1, ymm3 | |
add rsi, 24 | |
add rdi, 64 | |
dec rax | |
vmovups ymm0, ymmword ptr [rdi - 64] | |
vmovups ymm1, ymmword ptr [rdi - 32] | |
vbroadcastss ymm2, dword ptr [rsi - 32] | |
vbroadcastss ymm3, dword ptr [rsi - 28] | |
prefetcht0 byte ptr [rdi + 512] | |
vfmadd231ps ymm8, ymm0, ymm2 | |
vfmadd231ps ymm12, ymm1, ymm2 | |
vfnmadd231ps ymm9, ymm0, ymm3 | |
vfnmadd231ps ymm13, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 24] | |
vbroadcastss ymm3, dword ptr [rsi - 20] | |
vfmadd231ps ymm10, ymm0, ymm2 | |
vfmadd231ps ymm14, ymm1, ymm2 | |
vfnmadd231ps ymm11, ymm0, ymm3 | |
vfnmadd231ps ymm15, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 16] | |
vbroadcastss ymm3, dword ptr [rsi - 12] | |
vfmadd231ps ymm4, ymm0, ymm2 | |
vfmadd231ps ymm6, ymm1, ymm2 | |
vfnmadd231ps ymm5, ymm0, ymm3 | |
vfnmadd231ps ymm7, ymm1, ymm3 | |
add rsi, 24 | |
add rdi, 64 | |
dec rax | |
vmovups ymm0, ymmword ptr [rdi - 64] | |
vmovups ymm1, ymmword ptr [rdi - 32] | |
vbroadcastss ymm2, dword ptr [rsi - 32] | |
vbroadcastss ymm3, dword ptr [rsi - 28] | |
prefetcht0 byte ptr [rdi + 512] | |
vfmadd231ps ymm8, ymm0, ymm2 | |
vfmadd231ps ymm12, ymm1, ymm2 | |
vfnmadd231ps ymm9, ymm0, ymm3 | |
vfnmadd231ps ymm13, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 24] | |
vbroadcastss ymm3, dword ptr [rsi - 20] | |
vfmadd231ps ymm10, ymm0, ymm2 | |
vfmadd231ps ymm14, ymm1, ymm2 | |
vfnmadd231ps ymm11, ymm0, ymm3 | |
vfnmadd231ps ymm15, ymm1, ymm3 | |
vbroadcastss ymm2, dword ptr [rsi - 16] | |
vbroadcastss ymm3, dword ptr [rsi - 12] | |
vfmadd231ps ymm4, ymm0, ymm2 | |
vfmadd231ps ymm6, ymm1, ymm2 | |
vfnmadd231ps ymm5, ymm0, ymm3 | |
vfnmadd231ps ymm7, ymm1, ymm3 | |
add rsi, 24 | |
add rdi, 64 | |
dec rax |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment