Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save SwapnilGaikwad/421283f96f1783a38728c6b438fa46c1 to your computer and use it in GitHub Desktop.

Select an option

Save SwapnilGaikwad/421283f96f1783a38728c6b438fa46c1 to your computer and use it in GitHub Desktop.
; Assembly listing for method System.Text.ASCIIUtility:GetIndexOfFirstNonAsciiChar_Sse2(long,long):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; optimized using profile data
; rbp based frame
; fully interruptible
; with PGO: edge weights are invalid, and fgCalledCount is 157179
; 0 inlinees with PGO data; 4 single block inlinees; 7 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T01] ( 34, 3 ) long -> rbx
; V01 arg1 [V01,T00] ( 19, 5.81) long -> rsi
;* V02 loc0 [V02,T03] ( 0, 0 ) int -> zero-ref
;* V03 loc1 [V03,T04] ( 0, 0 ) int -> zero-ref
; V04 loc2 [V04,T07] ( 11, 1.21) simd16 -> mm1
; V05 loc3 [V05,T11] ( 3, 0 ) simd16 -> mm2
; V06 loc4 [V06,T10] ( 4, 0 ) int -> rdi
; V07 loc5 [V07,T02] ( 8, 1.60) long -> r14
;* V08 loc6 [V08,T08] ( 0, 0 ) simd16 -> zero-ref
;* V09 loc7 [V09 ] ( 0, 0 ) long -> zero-ref
; V10 loc8 [V10,T12] ( 3, 0 ) int -> rdi
; V11 loc9 [V11,T17] ( 2, 0 ) long -> rax
;* V12 loc10 [V12 ] ( 0, 0 ) simd16 -> zero-ref
; V13 loc11 [V13,T09] ( 5, 0 ) long -> rdi
;# V14 OutArgs [V14 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
;* V15 tmp1 [V15,T05] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V16 tmp2 [V16 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V17 tmp3 [V17 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V18 tmp4 [V18,T13] ( 3, 0 ) bool -> rdi "Inline return value spill temp"
;* V19 tmp5 [V19 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V20 tmp6 [V20 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V21 tmp7 [V21,T14] ( 3, 0 ) bool -> rax "Inline return value spill temp"
;* V22 tmp8 [V22 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V23 tmp9 [V23 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V24 tmp10 [V24,T15] ( 3, 0 ) bool -> rax "Inline return value spill temp"
;* V25 tmp11 [V25 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V26 tmp12 [V26 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V27 tmp13 [V27,T16] ( 3, 0 ) bool -> rdi "Inline return value spill temp"
;* V28 tmp14 [V28 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V29 tmp15 [V29 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V30 tmp16 [V30 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
;* V31 tmp17 [V31 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
; V32 cse0 [V32,T06] ( 6, 1.21) simd16 -> mm0 "CSE - moderate"
;
; Lcl frame size = 0
G_M65217_IG01:
push rbp
push r14
push rbx
vzeroupper
lea rbp, [rsp+10H]
mov rbx, rdi
;; size=15 bbWeight=1 PerfScore 4.75
G_M65217_IG02:
test rsi, rsi
je G_M65217_IG19
mov r14, rbx
cmp rsi, 8
jb G_M65217_IG15
;; size=22 bbWeight=1 PerfScore 2.75
G_M65217_IG03:
vmovupd xmm0, xmmword ptr [reloc @RWD00]
vmovdqu xmm1, xmmword ptr [r14]
vpaddusw xmm2, xmm1, xmm0
vpmovmskb eax, xmm2
test eax, 0xAAAA
jne G_M65217_IG13
add rsi, rsi
cmp rsi, 32
jae G_M65217_IG21
;; size=45 bbWeight=0.60 PerfScore 7.30
G_M65217_IG04:
add rbx, 16
;; size=4 bbWeight=0 PerfScore 0.00
G_M65217_IG05:
movzx rax, sil
test al, 15
je SHORT G_M65217_IG08
mov rax, rsi
and rax, 15
add rax, rbx
mov rbx, rax
sub rbx, 16
vmovdqu xmm1, xmmword ptr [rbx]
vpaddusw xmm0, xmm1, xmm0
vpmovmskb eax, xmm0
test eax, 0xAAAA
je SHORT G_M65217_IG06
mov eax, 1
jmp SHORT G_M65217_IG07
;; size=51 bbWeight=0 PerfScore 0.00
G_M65217_IG06:
xor eax, eax
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG07:
test eax, eax
jne SHORT G_M65217_IG13
add rbx, 16
;; size=8 bbWeight=0 PerfScore 0.00
G_M65217_IG08:
mov rax, rbx
sub rax, r14
shr rax, 1
;; size=9 bbWeight=0 PerfScore 0.00
G_M65217_IG09:
pop rbx
pop r14
pop rbp
ret
;; size=5 bbWeight=0 PerfScore 0.00
G_M65217_IG10:
vpaddusw xmm0, xmm1, xmm0
vpmovmskb esi, xmm0
test esi, 0xAAAA
je SHORT G_M65217_IG11
mov edi, 1
jmp SHORT G_M65217_IG12
;; size=23 bbWeight=0 PerfScore 0.00
G_M65217_IG11:
xor edi, edi
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG12:
test edi, edi
jne SHORT G_M65217_IG13
add rbx, 16
vmovaps xmm1, xmm2
;; size=12 bbWeight=0 PerfScore 0.00
G_M65217_IG13:
vpaddusw xmm0, xmm1, xmmword ptr [reloc @RWD00]
vpmovmskb edi, xmm0
and edi, 0xAAAA
tzcnt edi, edi
lea rbx, [rbx+rdi-1]
jmp SHORT G_M65217_IG08
;; size=29 bbWeight=0 PerfScore 0.00
G_M65217_IG14:
call [System.Text.ASCIIUtility:FirstCharInUInt32IsAscii(int):bool]
test eax, eax
je SHORT G_M65217_IG08
add rbx, 2
jmp SHORT G_M65217_IG08
;; size=16 bbWeight=0 PerfScore 0.00
G_M65217_IG15:
test sil, 4
je SHORT G_M65217_IG17
mov rdi, qword ptr [r14]
mov rax, 0xD1FFAB1E
test rdi, rax
je SHORT G_M65217_IG16
mov rsi, 0xD1FFAB1E
and rdi, rsi
tzcnt rdi, rdi
sar edi, 3
movsxd rbx, edi
and rbx, -2
add rbx, r14
jmp G_M65217_IG08
;; size=60 bbWeight=0 PerfScore 0.00
G_M65217_IG16:
lea rbx, [r14+8]
;; size=4 bbWeight=0 PerfScore 0.00
G_M65217_IG17:
test sil, 2
je SHORT G_M65217_IG18
mov edi, dword ptr [rbx]
test edi, 0xD1FFAB1E
jne SHORT G_M65217_IG14
add rbx, 4
;; size=20 bbWeight=0 PerfScore 0.00
G_M65217_IG18:
test sil, 1
je G_M65217_IG08
cmp word ptr [rbx], 127
ja G_M65217_IG08
add rbx, 2
jmp G_M65217_IG08
;; size=29 bbWeight=0 PerfScore 0.00
G_M65217_IG19:
xor eax, eax
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG20:
pop rbx
pop r14
pop rbp
ret
;; size=5 bbWeight=0 PerfScore 0.00
G_M65217_IG21:
lea rbx, [r14+16]
and rbx, -16
add rsi, r14
sub rsi, rbx
cmp rsi, 32
jb SHORT G_M65217_IG25
lea rax, [rbx+rsi]
sub rax, 32
;; size=28 bbWeight=0 PerfScore 0.00
G_M65217_IG22:
vmovdqa xmm1, xmmword ptr [rbx]
vmovdqa xmm2, xmmword ptr [rbx+16]
vpor xmm3, xmm1, xmm2
vpaddusw xmm3, xmm3, xmm0
vpmovmskb edi, xmm3
test edi, 0xAAAA
je SHORT G_M65217_IG23
mov edi, 1
jmp SHORT G_M65217_IG24
;; size=36 bbWeight=0 PerfScore 0.00
G_M65217_IG23:
xor edi, edi
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG24:
test edi, edi
jne G_M65217_IG10
add rbx, 32
cmp rbx, rax
jbe SHORT G_M65217_IG22
;; size=17 bbWeight=0 PerfScore 0.00
G_M65217_IG25:
test sil, 16
je G_M65217_IG05
vmovdqa xmm1, xmmword ptr [rbx]
vpaddusw xmm2, xmm1, xmm0
vpmovmskb eax, xmm2
test eax, 0xAAAA
je SHORT G_M65217_IG26
mov eax, 1
jmp SHORT G_M65217_IG27
;; size=36 bbWeight=0 PerfScore 0.00
G_M65217_IG26:
xor eax, eax
;; size=2 bbWeight=0 PerfScore 0.00
G_M65217_IG27:
test eax, eax
jne G_M65217_IG13
jmp G_M65217_IG04
;; size=13 bbWeight=0 PerfScore 0.00
RWD00 dq 7F807F807F807F80h, 7F807F807F807F80h
; Total bytes of code 497, prolog size 15, PerfScore 66.40, instruction count 137, allocated bytes for code 516 (MethodHash=aedd013e) for method System.Text.ASCIIUtility:GetIndexOfFirstNonAsciiChar_Sse2(long,long):long
; ============================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment