Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save SwapnilGaikwad/5e40f3c7fe9cdd34425f0bfa67be37fc to your computer and use it in GitHub Desktop.

Select an option

Save SwapnilGaikwad/5e40f3c7fe9cdd34425f0bfa67be37fc to your computer and use it in GitHub Desktop.
; Assembly listing for method System.Text.ASCIIUtility:GetIndexOfFirstNonAsciiChar_Sse2(long,long):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; optimized using profile data
; rbp based frame
; fully interruptible
; with PGO: edge weights are invalid, and fgCalledCount is 157179
; 0 inlinees with PGO data; 4 single block inlinees; 7 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T01] ( 34, 3 ) long -> rbx
; V01 arg1 [V01,T00] ( 19, 5.81) long -> rsi
;* V02 loc0 [V02,T03] ( 0, 0 ) int -> zero-ref
;* V03 loc1 [V03,T04] ( 0, 0 ) int -> zero-ref
; V04 loc2 [V04,T06] ( 11, 1.21) simd16 -> mm0
; V05 loc3 [V05,T10] ( 3, 0 ) simd16 -> mm1
; V06 loc4 [V06,T09] ( 4, 0 ) int -> rdi
; V07 loc5 [V07,T02] ( 8, 1.60) long -> r14
;* V08 loc6 [V08,T07] ( 0, 0 ) simd16 -> zero-ref
;* V09 loc7 [V09 ] ( 0, 0 ) long -> zero-ref
; V10 loc8 [V10,T11] ( 3, 0 ) int -> rdi
; V11 loc9 [V11,T16] ( 2, 0 ) long -> rax
;* V12 loc10 [V12 ] ( 0, 0 ) simd16 -> zero-ref
; V13 loc11 [V13,T08] ( 5, 0 ) long -> rdi
;# V14 OutArgs [V14 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
;* V15 tmp1 [V15,T05] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V16 tmp2 [V16 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V17 tmp3 [V17,T12] ( 3, 0 ) bool -> rdi "Inline return value spill temp"
;* V18 tmp4 [V18 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V19 tmp5 [V19,T13] ( 3, 0 ) bool -> rax "Inline return value spill temp"
;* V20 tmp6 [V20 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V21 tmp7 [V21,T14] ( 3, 0 ) bool -> rax "Inline return value spill temp"
;* V22 tmp8 [V22 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V23 tmp9 [V23,T15] ( 3, 0 ) bool -> rsi "Inline return value spill temp"
;* V24 tmp10 [V24 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V25 tmp11 [V25 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
;* V26 tmp12 [V26 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
;
; Lcl frame size = 0
G_M65217_IG01:
push rbp
push r14
push rbx
vzeroupper
lea rbp, [rsp+10H]
mov rbx, rdi
;; size=15 bbWeight=1 PerfScore 4.75
G_M65217_IG02:
test rsi, rsi
je G_M65217_IG19
mov r14, rbx
cmp rsi, 8
jb G_M65217_IG15
;; size=22 bbWeight=1 PerfScore 2.75
G_M65217_IG03:
vmovdqu xmm0, xmmword ptr [r14]
vptest xmm0, xmmword ptr [reloc @RWD00]
jne SHORT G_M65217_IG13
add rsi, rsi
cmp rsi, 32
jae G_M65217_IG21
;; size=29 bbWeight=0.60 PerfScore 6.95
G_M65217_IG04:
add rbx, 16
;; size=4 bbWeight=0 PerfScore 0.00
G_M65217_IG05:
movzx rax, sil
test al, 15
je SHORT G_M65217_IG08
mov rax, rsi
and rax, 15
add rax, rbx
mov rbx, rax
sub rbx, 16
vmovdqu xmm0, xmmword ptr [rbx]
vptest xmm0, xmmword ptr [reloc @RWD00]
je SHORT G_M65217_IG06
mov eax, 1
jmp SHORT G_M65217_IG07
;; size=47 bbWeight=0 PerfScore 0.00
G_M65217_IG06:
xor eax, eax
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG07:
test eax, eax
jne SHORT G_M65217_IG13
add rbx, 16
;; size=8 bbWeight=0 PerfScore 0.00
G_M65217_IG08:
mov rax, rbx
sub rax, r14
shr rax, 1
;; size=9 bbWeight=0 PerfScore 0.00
G_M65217_IG09:
pop rbx
pop r14
pop rbp
ret
;; size=5 bbWeight=0 PerfScore 0.00
G_M65217_IG10:
vptest xmm0, xmmword ptr [reloc @RWD00]
je SHORT G_M65217_IG11
mov esi, 1
jmp SHORT G_M65217_IG12
;; size=18 bbWeight=0 PerfScore 0.00
G_M65217_IG11:
xor esi, esi
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG12:
test esi, esi
jne SHORT G_M65217_IG13
add rbx, 16
vmovaps xmm0, xmm1
;; size=12 bbWeight=0 PerfScore 0.00
G_M65217_IG13:
vpaddusw xmm0, xmm0, xmmword ptr [reloc @RWD16]
vpmovmskb edi, xmm0
and edi, 0xAAAA
tzcnt edi, edi
lea rbx, [rbx+rdi-1]
jmp SHORT G_M65217_IG08
;; size=29 bbWeight=0 PerfScore 0.00
G_M65217_IG14:
call [System.Text.ASCIIUtility:FirstCharInUInt32IsAscii(int):bool]
test eax, eax
je SHORT G_M65217_IG08
add rbx, 2
jmp SHORT G_M65217_IG08
;; size=16 bbWeight=0 PerfScore 0.00
G_M65217_IG15:
test sil, 4
je SHORT G_M65217_IG17
mov rdi, qword ptr [r14]
mov rax, 0xD1FFAB1E
test rdi, rax
je SHORT G_M65217_IG16
mov rsi, 0xD1FFAB1E
and rdi, rsi
tzcnt rdi, rdi
sar edi, 3
movsxd rbx, edi
and rbx, -2
add rbx, r14
jmp G_M65217_IG08
;; size=60 bbWeight=0 PerfScore 0.00
G_M65217_IG16:
lea rbx, [r14+8]
;; size=4 bbWeight=0 PerfScore 0.00
G_M65217_IG17:
test sil, 2
je SHORT G_M65217_IG18
mov edi, dword ptr [rbx]
test edi, 0xD1FFAB1E
jne SHORT G_M65217_IG14
add rbx, 4
;; size=20 bbWeight=0 PerfScore 0.00
G_M65217_IG18:
test sil, 1
je G_M65217_IG08
cmp word ptr [rbx], 127
ja G_M65217_IG08
add rbx, 2
jmp G_M65217_IG08
;; size=29 bbWeight=0 PerfScore 0.00
G_M65217_IG19:
xor eax, eax
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG20:
pop rbx
pop r14
pop rbp
ret
;; size=5 bbWeight=0 PerfScore 0.00
G_M65217_IG21:
lea rbx, [r14+16]
and rbx, -16
add rsi, r14
sub rsi, rbx
cmp rsi, 32
jb SHORT G_M65217_IG25
lea rax, [rbx+rsi]
sub rax, 32
;; size=28 bbWeight=0 PerfScore 0.00
G_M65217_IG22:
vmovdqa xmm0, xmmword ptr [rbx]
vmovdqa xmm1, xmmword ptr [rbx+16]
vpor xmm2, xmm0, xmm1
vptest xmm2, xmmword ptr [reloc @RWD00]
je SHORT G_M65217_IG23
mov edi, 1
jmp SHORT G_M65217_IG24
;; size=31 bbWeight=0 PerfScore 0.00
G_M65217_IG23:
xor edi, edi
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG24:
test edi, edi
jne G_M65217_IG10
add rbx, 32
cmp rbx, rax
jbe SHORT G_M65217_IG22
;; size=17 bbWeight=0 PerfScore 0.00
G_M65217_IG25:
test sil, 16
je G_M65217_IG05
vmovdqa xmm0, xmmword ptr [rbx]
vptest xmm0, xmmword ptr [reloc @RWD00]
je SHORT G_M65217_IG26
mov eax, 1
jmp SHORT G_M65217_IG27
;; size=32 bbWeight=0 PerfScore 0.00
G_M65217_IG26:
xor eax, eax
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG27:
test eax, eax
jne G_M65217_IG13
jmp G_M65217_IG04
;; size=13 bbWeight=0 PerfScore 0.00
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
RWD16 dq 7F807F807F807F80h, 7F807F807F807F80h
; Total bytes of code 463, prolog size 15, PerfScore 61.55, instruction count 126, allocated bytes for code 471 (MethodHash=aedd013e) for method System.Text.ASCIIUtility:GetIndexOfFirstNonAsciiChar_Sse2(long,long):long
; ============================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment