Skip to content

Instantly share code, notes, and snippets.

@SwapnilGaikwad
Created June 21, 2022 10:59
Show Gist options
  • Save SwapnilGaikwad/1d294bdcac31b91d2a83266f09aef061 to your computer and use it in GitHub Desktop.
Save SwapnilGaikwad/1d294bdcac31b91d2a83266f09aef061 to your computer and use it in GitHub Desktop.
; Assembly listing for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Sse2(long,long,long):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; optimized using profile data
; rbp based frame
; fully interruptible
; with PGO: edge weights are invalid, and fgCalledCount is 43814
; 0 inlinees with PGO data; 8 single block inlinees; 8 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 6, 3 ) long -> rbx single-def
; V01 arg1 [V01,T02] ( 10, 2 ) long -> r15 single-def
; V02 arg2 [V02,T01] ( 6, 3 ) long -> r14 single-def
;* V03 loc0 [V03,T04] ( 0, 0 ) int -> zero-ref
;* V04 loc1 [V04,T05] ( 0, 0 ) long -> zero-ref
;* V05 loc2 [V05,T07] ( 0, 0 ) simd16 -> zero-ref
;* V06 loc3 [V06 ] ( 0, 0 ) simd16 -> zero-ref
; V07 loc4 [V07,T06] ( 14, 2 ) simd16 -> mm0
; V08 loc5 [V08,T09] ( 8, 0 ) simd16 -> [rbp-30H]
; V09 loc6 [V09,T08] ( 18, 0 ) long -> r12
; V10 loc7 [V10,T12] ( 2, 0 ) long -> r14
; V11 loc8 [V11,T10] ( 3, 0 ) simd16 -> mm1
;* V12 loc9 [V12 ] ( 0, 0 ) simd16 -> zero-ref
;# V13 OutArgs [V13 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V14 tmp1 [V14,T11] ( 3, 0 ) int -> rdi
;* V15 tmp2 [V15 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V16 tmp3 [V16 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V17 tmp4 [V17 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V18 tmp5 [V18 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V19 tmp6 [V19 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V20 tmp7 [V20 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V21 tmp8 [V21 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V22 tmp9 [V22 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V23 tmp10 [V23 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V24 tmp11 [V24 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V25 tmp12 [V25 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
; V26 cse0 [V26,T03] ( 13, 3 ) ref -> rsi "CSE - aggressive"
;
; Lcl frame size = 16
G_M2855_IG01: ;; offset=0000H
55 push rbp
4157 push r15
4156 push r14
4154 push r12
53 push rbx
4883EC10 sub rsp, 16
C5F877 vzeroupper
488D6C2430 lea rbp, [rsp+30H]
488BDF mov rbx, rdi
4C8BFE mov r15, rsi
4C8BF2 mov r14, rdx
;; size=29 bbWeight=1 PerfScore 7.50
G_M2855_IG02: ;; offset=001DH
4983FE20 cmp r14, 32
7316 jae SHORT G_M2855_IG03
48BF282000F8A57F0000 mov rdi, 0x7FA5F8002028 ; string handle
488B37 mov rsi, gword ptr [rdi]
488BFE mov rdi, rsi
FF151FD70D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=28 bbWeight=1 PerfScore 6.75
G_M2855_IG03: ;; offset=0039H
C5FA6F03 vmovdqu xmm0, xmmword ptr [rbx]
C4E2791705BA010000 vptest xmm0, xmmword ptr [reloc @RWD00]
0F859C010000 jne G_M2855_IG17
;; size=19 bbWeight=1 PerfScore 10.00
G_M2855_IG04: ;; offset=004CH
C5F967C0 vpackuswb xmm0, xmm0, xmm0
C4C179D607 vmovq qword ptr [r15], xmm0
41BC08000000 mov r12d, 8
41F6C708 test r15b, 8
7520 jne SHORT G_M2855_IG05
C5FA6F4310 vmovdqu xmm0, xmmword ptr [rbx+16]
C4E279170591010000 vptest xmm0, xmmword ptr [reloc @RWD00]
0F850F010000 jne G_M2855_IG13
C5F967C0 vpackuswb xmm0, xmm0, xmm0
C4C179D64708 vmovq qword ptr [r15+8], xmm0
;; size=51 bbWeight=0 PerfScore 0.00
G_M2855_IG05: ;; offset=007FH
498BFF mov rdi, r15
4883E70F and rdi, 15
41BC10000000 mov r12d, 16
4C2BE7 sub r12, rdi
4D85E4 test r12, r12
760C jbe SHORT G_M2855_IG06
33FF xor edi, edi
4983FC10 cmp r12, 16
400F96C7 setbe dil
EB02 jmp SHORT G_M2855_IG07
;; size=33 bbWeight=0 PerfScore 0.00
G_M2855_IG06: ;; offset=00A0H
33FF xor edi, edi
;; size=2 bbWeight=0 PerfScore 0.00
G_M2855_IG07: ;; offset=00A2H
4084FF test dil, dil
7520 jne SHORT G_M2855_IG08
48BF408800F8A57F0000 mov rdi, 0x7FA5F8008840 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE282000F8A57F0000 mov rsi, 0x7FA5F8002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF1591D60D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=37 bbWeight=0 PerfScore 0.00
G_M2855_IG08: ;; offset=00C7H
4D3BE6 cmp r12, r14
7620 jbe SHORT G_M2855_IG09
48BF488800F8A57F0000 mov rdi, 0x7FA5F8008848 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE282000F8A57F0000 mov rsi, 0x7FA5F8002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF156CD60D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=37 bbWeight=0 PerfScore 0.00
G_M2855_IG09: ;; offset=00ECH
498BFE mov rdi, r14
492BFC sub rdi, r12
4883FF10 cmp rdi, 16
7320 jae SHORT G_M2855_IG10
48BF508800F8A57F0000 mov rdi, 0x7FA5F8008850 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE282000F8A57F0000 mov rsi, 0x7FA5F8002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF1540D60D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=44 bbWeight=0 PerfScore 0.00
G_M2855_IG10: ;; offset=0118H
4983EE10 sub r14, 16
;; size=4 bbWeight=0 PerfScore 0.00
G_M2855_IG11: ;; offset=011CH
C4A17A6F0463 vmovdqu xmm0, xmmword ptr [rbx+2*r12]
C4A17A6F4C6310 vmovdqu xmm1, xmmword ptr [rbx+2*r12+16]
C5F9EBD1 vpor xmm2, xmm0, xmm1
C4E2791715CA000000 vptest xmm2, xmmword ptr [reloc @RWD00]
755A jne SHORT G_M2855_IG15
C5F967C1 vpackuswb xmm0, xmm0, xmm1
C5F92945D0 vmovapd xmmword ptr [rbp-30H], xmm0
4B8D3C27 lea rdi, [r15+r12]
40F6C70F test dil, 15
7420 je SHORT G_M2855_IG12
48BF588800F8A57F0000 mov rdi, 0x7FA5F8008858 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE282000F8A57F0000 mov rsi, 0x7FA5F8002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF15EDD50D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=79 bbWeight=0 PerfScore 0.00
G_M2855_IG12: ;; offset=016BH
C5F92845D0 vmovapd xmm0, xmmword ptr [rbp-30H]
C481797F0427 vmovdqa xmmword ptr [r15+r12], xmm0
4983C410 add r12, 16
4D3BE6 cmp r12, r14
769D jbe SHORT G_M2855_IG11
;; size=20 bbWeight=0 PerfScore 0.00
G_M2855_IG13: ;; offset=017FH
498BC4 mov rax, r12
;; size=3 bbWeight=0 PerfScore 0.00
G_M2855_IG14: ;; offset=0182H
4883C410 add rsp, 16
5B pop rbx
415C pop r12
415E pop r14
415F pop r15
5D pop rbp
C3 ret
;; size=13 bbWeight=0 PerfScore 0.00
G_M2855_IG15: ;; offset=018FH
C4E279170568000000 vptest xmm0, xmmword ptr [reloc @RWD00]
75E5 jne SHORT G_M2855_IG13
C5F967C0 vpackuswb xmm0, xmm0, xmm0
C5F92945D0 vmovapd xmmword ptr [rbp-30H], xmm0
4B8D3C27 lea rdi, [r15+r12]
40F6C707 test dil, 7
7420 je SHORT G_M2855_IG16
48BF608800F8A57F0000 mov rdi, 0x7FA5F8008860 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE282000F8A57F0000 mov rsi, 0x7FA5F8002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF158BD50D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=62 bbWeight=0 PerfScore 0.00
G_M2855_IG16: ;; offset=01CDH
C5F92845D0 vmovapd xmm0, xmmword ptr [rbp-30H]
C48179D60427 vmovq qword ptr [r15+r12], xmm0
4983C408 add r12, 8
EBA1 jmp SHORT G_M2855_IG13
;; size=17 bbWeight=0 PerfScore 0.00
G_M2855_IG17: ;; offset=01DEH
33C0 xor eax, eax
;; size=2 bbWeight=0 PerfScore 0.00
G_M2855_IG18: ;; offset=01E0H
4883C410 add rsp, 16
5B pop rbx
415C pop r12
415E pop r14
415F pop r15
5D pop rbp
C3 ret
;; size=13 bbWeight=0 PerfScore 0.00
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
; Total bytes of code 493, prolog size 29, PerfScore 74.65, instruction count 117, allocated bytes for code 504 (MethodHash=d994f4d8) for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Sse2(long,long,long):long
; ============================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment