Skip to content

Instantly share code, notes, and snippets.

@SwapnilGaikwad
Created July 5, 2022 15:16
Show Gist options
  • Save SwapnilGaikwad/af43ea665230b9d44d79a1b3ab4d8b19 to your computer and use it in GitHub Desktop.
Save SwapnilGaikwad/af43ea665230b9d44d79a1b3ab4d8b19 to your computer and use it in GitHub Desktop.
; Assembly listing for method System.Text.Tests.AsciiUtilityTests:GetIndexOfFirstNonAsciiChar_Intrinsifed(long,long):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
; 2 inlinees with PGO data; 9 single block inlinees; 5 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 34, 35.50) long -> rbx
; V01 arg1 [V01,T01] ( 17, 10 ) long -> rsi
;* V02 loc0 [V02,T13] ( 0, 0 ) int -> zero-ref single-def
;* V03 loc1 [V03,T14] ( 0, 0 ) int -> zero-ref single-def
; V04 loc2 [V04,T16] ( 11, 12.50) simd16 -> mm0
; V05 loc3 [V05,T17] ( 3, 8.50) simd16 -> mm2
; V06 loc4 [V06,T05] ( 4, 2 ) int -> rsi
; V07 loc5 [V07,T04] ( 8, 4 ) long -> r14 single-def
;* V08 loc6 [V08 ] ( 0, 0 ) long -> zero-ref
; V09 loc7 [V09,T07] ( 3, 1.50) int -> rdi
; V10 loc8 [V10,T03] ( 2, 4.50) long -> rax single-def
;* V11 loc9 [V11 ] ( 0, 0 ) simd16 -> zero-ref
;* V12 loc10 [V12 ] ( 0, 0 ) simd16 -> zero-ref
;* V13 loc11 [V13 ] ( 0, 0 ) long -> zero-ref
; V14 loc12 [V14,T06] ( 4, 2 ) long -> rdi
;* V15 loc13 [V15 ] ( 0, 0 ) int -> zero-ref
;# V16 OutArgs [V16 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
;* V17 tmp1 [V17,T09] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V18 tmp2 [V18 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V19 tmp3 [V19 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V20 tmp4 [V20,T02] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V21 tmp5 [V21,T19] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
; V22 tmp6 [V22,T15] ( 2, 16 ) simd16 -> mm3 "Inlining Arg"
;* V23 tmp7 [V23,T10] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V24 tmp8 [V24 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V25 tmp9 [V25 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V26 tmp10 [V26,T11] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V27 tmp11 [V27 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V28 tmp12 [V28 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V29 tmp13 [V29,T12] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V30 tmp14 [V30,T20] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V31 tmp15 [V31 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V32 tmp16 [V32 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
;* V33 tmp17 [V33 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
;* V34 tmp18 [V34 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
;* V35 tmp19 [V35 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
; V36 cse0 [V36,T08] ( 3, 1.50) long -> rdi "CSE - moderate"
; V37 cse1 [V37,T18] ( 4, 2 ) simd16 -> mm1 "CSE - aggressive"
;
; Lcl frame size = 0
G_M21315_IG01:
push rbp
push r14
push rbx
vzeroupper
lea rbp, [rsp+10H]
mov rbx, rdi
;; size=15 bbWeight=1 PerfScore 4.75
G_M21315_IG02:
test rsi, rsi
jne SHORT G_M21315_IG05
;; size=5 bbWeight=1 PerfScore 1.25
G_M21315_IG03:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M21315_IG04:
pop rbx
pop r14
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M21315_IG05:
mov r14, rbx
cmp rsi, 8
jb G_M21315_IG15
vmovdqu xmm0, xmmword ptr [r14]
vmovupd xmm1, xmmword ptr [reloc @RWD00]
vptest xmm0, xmm1
jne G_M21315_IG13
add rsi, rsi
cmp rsi, 32
jb SHORT G_M21315_IG08
lea rbx, [r14+16]
and rbx, -16
mov rax, rbx
sub rax, r14
sub rsi, rax
cmp rsi, 32
jb SHORT G_M21315_IG07
lea rax, [rbx+rsi]
sub rax, 32
align [0 bytes for IG06]
;; size=77 bbWeight=0.50 PerfScore 8.75
G_M21315_IG06:
vmovdqu xmm0, xmmword ptr [rbx]
vmovdqu xmm2, xmmword ptr [rbx+16]
vpor xmm3, xmm0, xmm2
vptest xmm3, xmmword ptr [reloc @RWD00]
jne SHORT G_M21315_IG12
add rbx, 32
cmp rbx, rax
jbe SHORT G_M21315_IG06
;; size=33 bbWeight=4 PerfScore 63.33
G_M21315_IG07:
test sil, 16
je SHORT G_M21315_IG09
vmovdqu xmm0, xmmword ptr [rbx]
vptest xmm0, xmm1
jne SHORT G_M21315_IG13
;; size=17 bbWeight=0.50 PerfScore 4.62
G_M21315_IG08:
add rbx, 16
;; size=4 bbWeight=0.50 PerfScore 0.12
G_M21315_IG09:
movzx rax, sil
test al, 15
je SHORT G_M21315_IG10
mov rax, rsi
and rax, 15
add rax, rbx
mov rbx, rax
sub rbx, 16
vmovdqu xmm0, xmmword ptr [rbx]
vptest xmm0, xmm1
jne SHORT G_M21315_IG13
add rbx, 16
;; size=40 bbWeight=0.50 PerfScore 5.50
G_M21315_IG10:
mov rax, rbx
sub rax, r14
shr rax, 1
;; size=9 bbWeight=0.50 PerfScore 0.50
G_M21315_IG11:
pop rbx
pop r14
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M21315_IG12:
vptest xmm0, xmmword ptr [reloc @RWD00]
jne SHORT G_M21315_IG13
add rbx, 16
vmovaps xmm0, xmm2
;; size=19 bbWeight=0.50 PerfScore 3.25
G_M21315_IG13:
vpaddusw xmm0, xmm0, xmmword ptr [reloc @RWD16]
vpmovmskb esi, xmm0
and esi, 0xAAAA
xor edi, edi
tzcnt edi, esi
lea rbx, [rbx+rdi-1]
jmp SHORT G_M21315_IG10
;; size=31 bbWeight=0.50 PerfScore 4.75
G_M21315_IG14:
call [System.Text.Tests.AsciiUtilityTests:FirstCharInUInt32IsAscii(int):bool]
test eax, eax
je SHORT G_M21315_IG10
add rbx, 2
jmp SHORT G_M21315_IG10
;; size=16 bbWeight=0.50 PerfScore 3.25
G_M21315_IG15:
test sil, 4
je SHORT G_M21315_IG18
mov rdi, qword ptr [r14]
mov rax, 0xD1FFAB1E
and rdi, rax
je SHORT G_M21315_IG17
;; size=24 bbWeight=0.50 PerfScore 2.38
G_M21315_IG16:
xor esi, esi
tzcnt rsi, rdi
mov edi, esi
sar edi, 3
movsxd rbx, edi
and rbx, -2
add rbx, r14
jmp SHORT G_M21315_IG10
;; size=24 bbWeight=0.50 PerfScore 2.88
G_M21315_IG17:
lea rbx, [r14+8]
;; size=4 bbWeight=0.50 PerfScore 0.25
G_M21315_IG18:
test sil, 2
je SHORT G_M21315_IG19
mov edi, dword ptr [rbx]
test edi, 0xD1FFAB1E
jne SHORT G_M21315_IG14
add rbx, 4
;; size=20 bbWeight=0.50 PerfScore 2.38
G_M21315_IG19:
test sil, 1
je G_M21315_IG10
cmp word ptr [rbx], 127
ja G_M21315_IG10
add rbx, 2
jmp G_M21315_IG10
;; size=29 bbWeight=0.50 PerfScore 3.75
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
RWD16 dq 7F807F807F807F80h, 7F807F807F807F80h
; Total bytes of code 379, prolog size 15, PerfScore 153.43, instruction count 109, allocated bytes for code 391 (MethodHash=6043acbc) for method System.Text.Tests.AsciiUtilityTests:GetIndexOfFirstNonAsciiChar_Intrinsifed(long,long):long
; ============================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment