Created
December 7, 2022 13:34
-
-
Save udoprog/ec85576b1d0baa120b068feb92536ab5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- a.txt 2022-12-07 14:31:31.870270000 +0100 | |
+++ b.txt 2022-12-07 14:31:58.517728400 +0100 | |
@@ -16,34 +16,40 @@ | |
.zero 1 | |
.zero 1 | |
.LCPI0_1: | |
- .long 2147483648 | |
- .long 2147483648 | |
- .long 2147483648 | |
- .long 2147483648 | |
-example::diff: | |
- xor eax, eax | |
+ .long 1065353216 | |
+ .long 1065353216 | |
+ .long 1065353216 | |
+ .long 1065353216 | |
+.LCPI0_2: | |
+ .long 1 | |
+ .long 1 | |
+ .long 1 | |
+ .long 1 | |
+example::diff2: | |
+ xor ecx, ecx | |
test rsi, rsi | |
je .LBB0_12 | |
- mov r10, rdi | |
+ mov rax, rdi | |
cmp rsi, 8 | |
jb .LBB0_10 | |
- mov r10, rsi | |
- and r10, -8 | |
- lea rax, [r10 - 8] | |
- mov r8, rax | |
+ mov rax, rsi | |
+ and rax, -8 | |
+ lea rcx, [rax - 8] | |
+ mov r8, rcx | |
shr r8, 3 | |
inc r8 | |
- test rax, rax | |
+ test rcx, rcx | |
je .LBB0_3 | |
mov r9, r8 | |
and r9, -2 | |
- pxor xmm10, xmm10 | |
+ pxor xmm2, xmm2 | |
xor ecx, ecx | |
pcmpeqd xmm8, xmm8 | |
movdqa xmm9, xmmword ptr [rip + .LCPI0_0] | |
- movdqa xmm4, xmmword ptr [rip + .LCPI0_1] | |
- xorps xmm0, xmm0 | |
- xorps xmm1, xmm1 | |
+ movdqa xmm5, xmmword ptr [rip + .LCPI0_1] | |
+ movdqa xmm6, xmmword ptr [rip + .LCPI0_2] | |
+ pxor xmm0, xmm0 | |
+ pxor xmm1, xmm1 | |
.LBB0_5: | |
movd xmm7, dword ptr [rdi + rcx] | |
movd xmm3, dword ptr [rdi + rcx + 4] | |
@@ -51,185 +57,143 @@ | |
paddb xmm3, xmm8 | |
pand xmm7, xmm9 | |
pand xmm3, xmm9 | |
- punpcklbw xmm7, xmm10 | |
- punpcklwd xmm7, xmm10 | |
- punpcklbw xmm3, xmm10 | |
- punpcklwd xmm3, xmm10 | |
- pshuflw xmm6, xmm7, 254 | |
- movdqa xmm5, xmm4 | |
- psrld xmm5, xmm6 | |
- pshuflw xmm2, xmm7, 84 | |
- movdqa xmm6, xmm4 | |
- psrld xmm6, xmm2 | |
- punpcklqdq xmm6, xmm5 | |
- pshufd xmm2, xmm7, 238 | |
- pshuflw xmm5, xmm2, 254 | |
- movdqa xmm7, xmm4 | |
- psrld xmm7, xmm5 | |
- pshuflw xmm2, xmm2, 84 | |
- movdqa xmm5, xmm4 | |
- psrld xmm5, xmm2 | |
- punpckhqdq xmm5, xmm7 | |
- shufps xmm6, xmm5, 204 | |
- pshuflw xmm2, xmm3, 254 | |
- movdqa xmm5, xmm4 | |
- psrld xmm5, xmm2 | |
- pshuflw xmm2, xmm3, 84 | |
- movdqa xmm7, xmm4 | |
- psrld xmm7, xmm2 | |
- orps xmm6, xmm0 | |
- punpcklqdq xmm7, xmm5 | |
- pshufd xmm0, xmm3, 238 | |
- pshuflw xmm2, xmm0, 254 | |
- movdqa xmm3, xmm4 | |
- psrld xmm3, xmm2 | |
- pshuflw xmm0, xmm0, 84 | |
- movdqa xmm2, xmm4 | |
- psrld xmm2, xmm0 | |
- punpckhqdq xmm2, xmm3 | |
- shufps xmm7, xmm2, 204 | |
- orps xmm7, xmm1 | |
- movd xmm1, dword ptr [rdi + rcx + 8] | |
- movd xmm3, dword ptr [rdi + rcx + 12] | |
+ punpcklbw xmm7, xmm2 | |
+ punpcklwd xmm7, xmm2 | |
+ punpcklbw xmm3, xmm2 | |
+ punpcklwd xmm3, xmm2 | |
+ pslld xmm7, 23 | |
+ paddd xmm7, xmm5 | |
+ cvttps2dq xmm7, xmm7 | |
+ pshufd xmm4, xmm7, 245 | |
+ pmuludq xmm7, xmm6 | |
+ pshufd xmm7, xmm7, 232 | |
+ pmuludq xmm4, xmm6 | |
+ pshufd xmm4, xmm4, 232 | |
+ punpckldq xmm7, xmm4 | |
+ por xmm7, xmm0 | |
+ pslld xmm3, 23 | |
+ paddd xmm3, xmm5 | |
+ cvttps2dq xmm0, xmm3 | |
+ pshufd xmm4, xmm0, 245 | |
+ pmuludq xmm0, xmm6 | |
+ pshufd xmm3, xmm0, 232 | |
+ pmuludq xmm4, xmm6 | |
+ pshufd xmm0, xmm4, 232 | |
+ punpckldq xmm3, xmm0 | |
+ por xmm3, xmm1 | |
+ movd xmm0, dword ptr [rdi + rcx + 8] | |
+ movd xmm1, dword ptr [rdi + rcx + 12] | |
+ paddb xmm0, xmm8 | |
paddb xmm1, xmm8 | |
- paddb xmm3, xmm8 | |
+ pand xmm0, xmm9 | |
pand xmm1, xmm9 | |
- pand xmm3, xmm9 | |
- punpcklbw xmm1, xmm10 | |
- punpcklwd xmm1, xmm10 | |
- punpcklbw xmm3, xmm10 | |
- punpcklwd xmm3, xmm10 | |
- pshuflw xmm0, xmm1, 254 | |
- movdqa xmm2, xmm4 | |
- psrld xmm2, xmm0 | |
- pshuflw xmm5, xmm1, 84 | |
- movdqa xmm0, xmm4 | |
- psrld xmm0, xmm5 | |
- punpcklqdq xmm0, xmm2 | |
- pshufd xmm1, xmm1, 238 | |
- pshuflw xmm2, xmm1, 254 | |
- movdqa xmm5, xmm4 | |
- psrld xmm5, xmm2 | |
- pshuflw xmm1, xmm1, 84 | |
- movdqa xmm2, xmm4 | |
- psrld xmm2, xmm1 | |
- punpckhqdq xmm2, xmm5 | |
- shufps xmm0, xmm2, 204 | |
- pshuflw xmm1, xmm3, 254 | |
- movdqa xmm2, xmm4 | |
- psrld xmm2, xmm1 | |
- pshuflw xmm5, xmm3, 84 | |
- movdqa xmm1, xmm4 | |
- psrld xmm1, xmm5 | |
- orps xmm0, xmm6 | |
- punpcklqdq xmm1, xmm2 | |
- pshufd xmm2, xmm3, 238 | |
- pshuflw xmm3, xmm2, 254 | |
- movdqa xmm5, xmm4 | |
- psrld xmm5, xmm3 | |
- pshuflw xmm2, xmm2, 84 | |
- movdqa xmm3, xmm4 | |
- psrld xmm3, xmm2 | |
- punpckhqdq xmm3, xmm5 | |
- shufps xmm1, xmm3, 204 | |
- orps xmm1, xmm7 | |
+ punpcklbw xmm0, xmm2 | |
+ punpcklwd xmm0, xmm2 | |
+ punpcklbw xmm1, xmm2 | |
+ punpcklwd xmm1, xmm2 | |
+ pslld xmm0, 23 | |
+ paddd xmm0, xmm5 | |
+ cvttps2dq xmm0, xmm0 | |
+ pshufd xmm4, xmm0, 245 | |
+ pmuludq xmm0, xmm6 | |
+ pshufd xmm0, xmm0, 232 | |
+ pmuludq xmm4, xmm6 | |
+ pshufd xmm4, xmm4, 232 | |
+ punpckldq xmm0, xmm4 | |
+ por xmm0, xmm7 | |
+ pslld xmm1, 23 | |
+ paddd xmm1, xmm5 | |
+ cvttps2dq xmm1, xmm1 | |
+ pshufd xmm4, xmm1, 245 | |
+ pmuludq xmm1, xmm6 | |
+ pshufd xmm1, xmm1, 232 | |
+ pmuludq xmm4, xmm6 | |
+ pshufd xmm4, xmm4, 232 | |
+ punpckldq xmm1, xmm4 | |
+ por xmm1, xmm3 | |
add rcx, 16 | |
add r9, -2 | |
jne .LBB0_5 | |
test r8b, 1 | |
je .LBB0_8 | |
.LBB0_7: | |
- movd xmm4, dword ptr [rdi + rcx] | |
+ movd xmm3, dword ptr [rdi + rcx] | |
movd xmm2, dword ptr [rdi + rcx + 4] | |
- pcmpeqd xmm3, xmm3 | |
- paddb xmm4, xmm3 | |
+ pcmpeqd xmm4, xmm4 | |
+ paddb xmm3, xmm4 | |
movdqa xmm5, xmmword ptr [rip + .LCPI0_0] | |
- paddb xmm2, xmm3 | |
- pand xmm4, xmm5 | |
+ paddb xmm2, xmm4 | |
+ pand xmm3, xmm5 | |
pand xmm2, xmm5 | |
- pxor xmm3, xmm3 | |
- punpcklbw xmm4, xmm3 | |
- punpcklwd xmm4, xmm3 | |
- punpcklbw xmm2, xmm3 | |
- punpcklwd xmm2, xmm3 | |
- pshuflw xmm5, xmm4, 254 | |
- movdqa xmm3, xmmword ptr [rip + .LCPI0_1] | |
- movdqa xmm6, xmm3 | |
- psrld xmm6, xmm5 | |
- pshuflw xmm5, xmm4, 84 | |
- movdqa xmm7, xmm3 | |
- psrld xmm7, xmm5 | |
- pshufd xmm4, xmm4, 238 | |
- pshuflw xmm8, xmm4, 254 | |
- movdqa xmm5, xmm3 | |
- psrld xmm5, xmm8 | |
- pshuflw xmm8, xmm4, 84 | |
- movdqa xmm4, xmm3 | |
- psrld xmm4, xmm8 | |
- punpcklqdq xmm7, xmm6 | |
- punpckhqdq xmm4, xmm5 | |
- shufps xmm7, xmm4, 204 | |
- pshuflw xmm4, xmm2, 254 | |
- movdqa xmm5, xmm3 | |
- psrld xmm5, xmm4 | |
- pshuflw xmm4, xmm2, 84 | |
- movdqa xmm6, xmm3 | |
- psrld xmm6, xmm4 | |
- pshufd xmm2, xmm2, 238 | |
- pshuflw xmm8, xmm2, 254 | |
- movdqa xmm4, xmm3 | |
- psrld xmm4, xmm8 | |
- orps xmm0, xmm7 | |
- pshuflw xmm2, xmm2, 84 | |
- psrld xmm3, xmm2 | |
- punpcklqdq xmm6, xmm5 | |
- punpckhqdq xmm3, xmm4 | |
- shufps xmm6, xmm3, 204 | |
- orps xmm1, xmm6 | |
+ pxor xmm4, xmm4 | |
+ punpcklbw xmm3, xmm4 | |
+ punpcklwd xmm3, xmm4 | |
+ punpcklbw xmm2, xmm4 | |
+ punpcklwd xmm2, xmm4 | |
+ pslld xmm3, 23 | |
+ movdqa xmm4, xmmword ptr [rip + .LCPI0_1] | |
+ paddd xmm3, xmm4 | |
+ cvttps2dq xmm3, xmm3 | |
+ movdqa xmm5, xmmword ptr [rip + .LCPI0_2] | |
+ pshufd xmm6, xmm3, 245 | |
+ pmuludq xmm3, xmm5 | |
+ pshufd xmm3, xmm3, 232 | |
+ pmuludq xmm6, xmm5 | |
+ pshufd xmm6, xmm6, 232 | |
+ punpckldq xmm3, xmm6 | |
+ por xmm0, xmm3 | |
+ pslld xmm2, 23 | |
+ paddd xmm2, xmm4 | |
+ cvttps2dq xmm2, xmm2 | |
+ pshufd xmm3, xmm2, 245 | |
+ pmuludq xmm2, xmm5 | |
+ pshufd xmm2, xmm2, 232 | |
+ pmuludq xmm3, xmm5 | |
+ pshufd xmm3, xmm3, 232 | |
+ punpckldq xmm2, xmm3 | |
+ por xmm1, xmm2 | |
.LBB0_8: | |
- orps xmm0, xmm1 | |
+ por xmm0, xmm1 | |
pshufd xmm1, xmm0, 238 | |
por xmm1, xmm0 | |
pshufd xmm0, xmm1, 85 | |
por xmm0, xmm1 | |
- movd eax, xmm0 | |
- cmp r10, rsi | |
+ movd ecx, xmm0 | |
+ cmp rax, rsi | |
je .LBB0_12 | |
- add r10, rdi | |
+ add rax, rdi | |
.LBB0_10: | |
add rdi, rsi | |
.LBB0_11: | |
- movzx ecx, byte ptr [r10] | |
- dec cl | |
- mov esi, -2147483648 | |
- shr esi, cl | |
- inc r10 | |
- or eax, esi | |
- cmp r10, rdi | |
+ movzx esi, byte ptr [rax] | |
+ inc rax | |
+ dec sil | |
+ bts ecx, esi | |
+ cmp rax, rdi | |
jne .LBB0_11 | |
.LBB0_12: | |
- mov ecx, eax | |
- shr ecx | |
- and ecx, 1431655765 | |
- sub eax, ecx | |
- mov ecx, eax | |
- and ecx, 858993459 | |
- shr eax, 2 | |
+ mov eax, ecx | |
+ shr eax | |
+ and eax, 1431655765 | |
+ sub ecx, eax | |
+ mov eax, ecx | |
and eax, 858993459 | |
- add eax, ecx | |
- mov ecx, eax | |
- shr ecx, 4 | |
+ shr ecx, 2 | |
+ and ecx, 858993459 | |
add ecx, eax | |
- and ecx, 252645135 | |
- imul eax, ecx, 16843009 | |
+ mov eax, ecx | |
+ shr eax, 4 | |
+ add eax, ecx | |
+ and eax, 252645135 | |
+ imul eax, eax, 16843009 | |
shr eax, 24 | |
cmp eax, edx | |
sete al | |
ret | |
.LBB0_3: | |
- xorps xmm0, xmm0 | |
+ pxor xmm0, xmm0 | |
xor ecx, ecx | |
- xorps xmm1, xmm1 | |
+ pxor xmm1, xmm1 | |
test r8b, 1 | |
jne .LBB0_7 | |
jmp .LBB0_8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment