Created
July 5, 2022 15:15
-
-
Save SwapnilGaikwad/f9d5c3aea191562dd44600806e0e1e05 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Assembly listing for method System.Text.Tests.AsciiUtilityTests:GetIndexOfFirstNonAsciiChar_Sse2(long,long):long | |
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix | |
; optimized code | |
; rbp based frame | |
; fully interruptible | |
; No PGO data | |
; 2 inlinees with PGO data; 9 single block inlinees; 0 inlinees without PGO data | |
; Final local variable assignments | |
; | |
; V00 arg0 [V00,T00] ( 34, 35.50) long -> rbx | |
; V01 arg1 [V01,T01] ( 19, 11 ) long -> rsi | |
;* V02 loc0 [V02,T08] ( 0, 0 ) int -> zero-ref single-def | |
;* V03 loc1 [V03,T09] ( 0, 0 ) int -> zero-ref single-def | |
; V04 loc2 [V04,T10] ( 11, 12.50) simd16 -> mm1 | |
; V05 loc3 [V05,T11] ( 3, 8.50) simd16 -> mm2 | |
; V06 loc4 [V06,T04] ( 6, 3 ) int -> rdi | |
; V07 loc5 [V07,T03] ( 8, 4 ) long -> r14 single-def | |
; V08 loc6 [V08,T12] ( 2, 4.50) simd16 -> mm0 single-def | |
;* V09 loc7 [V09,T13] ( 0, 0 ) simd16 -> zero-ref single-def | |
; V10 loc8 [V10,T06] ( 3, 1.50) int -> rdi | |
; V11 loc9 [V11,T02] ( 2, 4.50) long -> rax single-def | |
;* V12 loc10 [V12 ] ( 0, 0 ) simd16 -> zero-ref | |
; V13 loc11 [V13,T05] ( 4, 2 ) long -> rdi | |
;* V14 loc12 [V14 ] ( 0, 0 ) int -> zero-ref | |
;# V15 OutArgs [V15 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace" | |
;* V16 tmp1 [V16 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp" | |
;* V17 tmp2 [V17 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp" | |
;* V18 tmp3 [V18 ] ( 0, 0 ) long -> zero-ref "Inlining Arg" | |
;* V19 tmp4 [V19 ] ( 0, 0 ) int -> zero-ref "Inlining Arg" | |
; V20 cse0 [V20,T07] ( 3, 1.50) long -> rdi "CSE - moderate" | |
; | |
; Lcl frame size = 0 | |
G_M26287_IG01: | |
push rbp | |
push r14 | |
push rbx | |
vzeroupper | |
lea rbp, [rsp+10H] | |
mov rbx, rdi | |
;; size=15 bbWeight=1 PerfScore 4.75 | |
G_M26287_IG02: | |
test rsi, rsi | |
jne SHORT G_M26287_IG05 | |
;; size=5 bbWeight=1 PerfScore 1.25 | |
G_M26287_IG03: | |
xor eax, eax | |
;; size=2 bbWeight=0.50 PerfScore 0.12 | |
G_M26287_IG04: | |
pop rbx | |
pop r14 | |
pop rbp | |
ret | |
;; size=5 bbWeight=0.50 PerfScore 1.25 | |
G_M26287_IG05: | |
mov r14, rbx | |
cmp rsi, 8 | |
jb G_M26287_IG16 | |
vmovupd xmm0, xmmword ptr [reloc @RWD00] | |
vmovdqu xmm1, xmmword ptr [r14] | |
vpaddusw xmm1, xmm1, xmmword ptr [reloc @RWD16] | |
vpmovmskb edi, xmm1 | |
test edi, 0xAAAA | |
jne G_M26287_IG14 | |
add rsi, rsi | |
cmp rsi, 32 | |
jb SHORT G_M26287_IG08 | |
lea rbx, [r14+16] | |
and rbx, -16 | |
add rsi, r14 | |
sub rsi, rbx | |
cmp rsi, 32 | |
jb SHORT G_M26287_IG07 | |
lea rax, [rbx+rsi] | |
sub rax, 32 | |
align [11 bytes for IG06] | |
;; size=101 bbWeight=0.50 PerfScore 9.38 | |
G_M26287_IG06: | |
vmovdqa xmm1, xmmword ptr [rbx] | |
vmovdqa xmm2, xmmword ptr [rbx+16] | |
vpor xmm3, xmm1, xmm2 | |
vptest xmm3, xmm0 | |
jne SHORT G_M26287_IG12 | |
add rbx, 32 | |
cmp rbx, rax | |
jbe SHORT G_M26287_IG06 | |
;; size=29 bbWeight=4 PerfScore 55.33 | |
G_M26287_IG07: | |
test sil, 16 | |
je SHORT G_M26287_IG09 | |
vmovdqa xmm1, xmmword ptr [rbx] | |
vptest xmm1, xmmword ptr [reloc @RWD00] | |
jne SHORT G_M26287_IG13 | |
;; size=21 bbWeight=0.50 PerfScore 5.62 | |
G_M26287_IG08: | |
add rbx, 16 | |
;; size=4 bbWeight=0.50 PerfScore 0.12 | |
G_M26287_IG09: | |
movzx rax, sil | |
test al, 15 | |
je SHORT G_M26287_IG10 | |
mov rax, rsi | |
and rax, 15 | |
add rax, rbx | |
mov rbx, rax | |
sub rbx, 16 | |
vmovdqu xmm1, xmmword ptr [rbx] | |
vptest xmm1, xmmword ptr [reloc @RWD00] | |
jne SHORT G_M26287_IG13 | |
add rbx, 16 | |
;; size=44 bbWeight=0.50 PerfScore 6.50 | |
G_M26287_IG10: | |
mov rax, rbx | |
sub rax, r14 | |
shr rax, 1 | |
;; size=9 bbWeight=0.50 PerfScore 0.50 | |
G_M26287_IG11: | |
pop rbx | |
pop r14 | |
pop rbp | |
ret | |
;; size=5 bbWeight=0.50 PerfScore 1.25 | |
G_M26287_IG12: | |
vptest xmm1, xmmword ptr [reloc @RWD00] | |
jne SHORT G_M26287_IG13 | |
add rbx, 16 | |
vmovaps xmm1, xmm2 | |
;; size=19 bbWeight=0.50 PerfScore 3.25 | |
G_M26287_IG13: | |
vpaddusw xmm0, xmm1, xmmword ptr [reloc @RWD16] | |
vpmovmskb edi, xmm0 | |
;; size=12 bbWeight=0.50 PerfScore 2.00 | |
G_M26287_IG14: | |
and edi, 0xAAAA | |
xor esi, esi | |
tzcnt esi, edi | |
mov edi, esi | |
lea rbx, [rbx+rdi-1] | |
jmp SHORT G_M26287_IG10 | |
;; size=21 bbWeight=0.50 PerfScore 2.88 | |
G_M26287_IG15: | |
call [System.Text.Tests.AsciiUtilityTests:FirstCharInUInt32IsAscii(int):bool] | |
test eax, eax | |
je SHORT G_M26287_IG10 | |
add rbx, 2 | |
jmp SHORT G_M26287_IG10 | |
;; size=16 bbWeight=0.50 PerfScore 3.25 | |
G_M26287_IG16: | |
test sil, 4 | |
je SHORT G_M26287_IG19 | |
mov rdi, qword ptr [r14] | |
mov rax, 0xD1FFAB1E | |
and rdi, rax | |
je SHORT G_M26287_IG18 | |
;; size=24 bbWeight=0.50 PerfScore 2.38 | |
G_M26287_IG17: | |
xor esi, esi | |
tzcnt rsi, rdi | |
mov edi, esi | |
sar edi, 3 | |
movsxd rbx, edi | |
and rbx, -2 | |
add rbx, r14 | |
jmp G_M26287_IG10 | |
;; size=27 bbWeight=0.50 PerfScore 2.88 | |
G_M26287_IG18: | |
lea rbx, [r14+8] | |
;; size=4 bbWeight=0.50 PerfScore 0.25 | |
G_M26287_IG19: | |
test sil, 2 | |
je SHORT G_M26287_IG20 | |
mov edi, dword ptr [rbx] | |
test edi, 0xD1FFAB1E | |
jne SHORT G_M26287_IG15 | |
add rbx, 4 | |
;; size=20 bbWeight=0.50 PerfScore 2.38 | |
G_M26287_IG20: | |
test sil, 1 | |
je G_M26287_IG10 | |
cmp word ptr [rbx], 127 | |
ja G_M26287_IG10 | |
add rbx, 2 | |
jmp G_M26287_IG10 | |
;; size=29 bbWeight=0.50 PerfScore 3.75 | |
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h | |
RWD16 dq 7F807F807F807F80h, 7F807F807F807F80h | |
; Total bytes of code 412, prolog size 15, PerfScore 151.08, instruction count 111, allocated bytes for code 420 (MethodHash=335d9950) for method System.Text.Tests.AsciiUtilityTests:GetIndexOfFirstNonAsciiChar_Sse2(long,long):long | |
; ============================================================ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment