Skip to content

Instantly share code, notes, and snippets.

@SwapnilGaikwad
Created June 21, 2022 13:53
Show Gist options
  • Save SwapnilGaikwad/e4e65798b924567c57cd0588a5c4740a to your computer and use it in GitHub Desktop.
Save SwapnilGaikwad/e4e65798b924567c57cd0588a5c4740a to your computer and use it in GitHub Desktop.
; Assembly listing for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Intrinsified(long,long,long):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 9 single block inlinees; 20 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T06] ( 3, 3 ) long -> rdi single-def
; V01 arg1 [V01,T05] ( 5, 3.50) long -> rsi single-def
; V02 arg2 [V02,T07] ( 3, 2.50) long -> rdx single-def
;* V03 loc0 [V03,T20] ( 0, 0 ) int -> zero-ref
;* V04 loc1 [V04,T21] ( 0, 0 ) long -> zero-ref
; V05 loc2 [V05,T01] ( 5, 10.50) byref -> rdi single-def
; V06 loc3 [V06,T22] ( 14, 18.50) simd16 -> mm0
; V07 loc4 [V07,T03] ( 5, 6 ) byref -> rcx single-def
; V08 loc5 [V08,T24] ( 8, 11 ) simd16 -> mm2
; V09 loc6 [V09,T00] ( 11, 26.50) long -> rax
; V10 loc7 [V10,T08] ( 2, 4.50) long -> rdx
; V11 loc8 [V11,T23] ( 3, 12 ) simd16 -> mm2
;* V12 loc9 [V12 ] ( 0, 0 ) simd16 -> zero-ref
;# V13 OutArgs [V13 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V14 tmp1 [V14 ] ( 2, 2 ) struct ( 8) [rbp-08H] do-not-enreg[SB] "struct address for call/obj"
; V15 tmp2 [V15 ] ( 2, 2 ) struct ( 8) [rbp-10H] do-not-enreg[SB] "struct address for call/obj"
; V16 tmp3 [V16 ] ( 2, 2 ) struct ( 8) [rbp-18H] do-not-enreg[SB] "struct address for call/obj"
;* V17 tmp4 [V17,T14] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V18 tmp5 [V18 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V19 tmp6 [V19 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V20 tmp7 [V20,T26] ( 2, 2 ) simd16 -> [rbp-30H] do-not-enreg[S] ld-addr-op "Inlining Arg"
;* V21 tmp8 [V21 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V22 tmp9 [V22 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V23 tmp10 [V23 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V24 tmp11 [V24 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V25 tmp12 [V25,T15] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V26 tmp13 [V26 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V27 tmp14 [V27 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V28 tmp15 [V28,T27] ( 2, 2 ) simd16 -> [rbp-40H] do-not-enreg[S] ld-addr-op "Inlining Arg"
;* V29 tmp16 [V29 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
; V30 tmp17 [V30,T10] ( 2, 2 ) byref -> r8 "Inlining Arg"
;* V31 tmp18 [V31 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V32 tmp19 [V32 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V33 tmp20 [V33 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V34 tmp21 [V34,T04] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V35 tmp22 [V35 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V36 tmp23 [V36 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V37 tmp24 [V37 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V38 tmp25 [V38,T16] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V39 tmp26 [V39 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V40 tmp27 [V40 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V41 tmp28 [V41,T28] ( 2, 2 ) simd16 -> [rbp-50H] do-not-enreg[S] ld-addr-op "Inlining Arg"
;* V42 tmp29 [V42 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
; V43 tmp30 [V43,T09] ( 4, 4 ) byref -> rcx "Inlining Arg"
;* V44 tmp31 [V44 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V45 tmp32 [V45 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V46 tmp33 [V46 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
; V47 tmp34 [V47,T11] ( 2, 1.50) long -> [rbp-08H] do-not-enreg[] V14._00(offs=0x00) P-DEP "field V14._00 (fldOffset=0x0)"
; V48 tmp35 [V48,T12] ( 2, 1.50) long -> [rbp-10H] do-not-enreg[] V15._00(offs=0x00) P-DEP "field V15._00 (fldOffset=0x0)"
; V49 tmp36 [V49,T13] ( 2, 1.50) long -> [rbp-18H] do-not-enreg[] V16._00(offs=0x00) P-DEP "field V16._00 (fldOffset=0x0)"
; V50 tmp37 [V50,T17] ( 2, 1 ) long -> rax V22._00(offs=0x00) P-INDEP "field V22._00 (fldOffset=0x0)"
;* V51 tmp38 [V51 ] ( 0, 0 ) long -> zero-ref V24._00(offs=0x00) P-INDEP "field V24._00 (fldOffset=0x0)"
; V52 tmp39 [V52,T18] ( 2, 1 ) long -> rax V31._00(offs=0x00) P-INDEP "field V31._00 (fldOffset=0x0)"
;* V53 tmp40 [V53 ] ( 0, 0 ) long -> zero-ref V33._00(offs=0x00) P-INDEP "field V33._00 (fldOffset=0x0)"
; V54 tmp41 [V54,T19] ( 2, 1 ) long -> rdi V44._00(offs=0x00) P-INDEP "field V44._00 (fldOffset=0x0)"
;* V55 tmp42 [V55 ] ( 0, 0 ) long -> zero-ref V46._00(offs=0x00) P-INDEP "field V46._00 (fldOffset=0x0)"
; V56 cse0 [V56,T02] ( 3, 8.50) long -> rsi "CSE - aggressive"
; V57 cse1 [V57,T25] ( 5, 7 ) simd16 -> mm1 "CSE - aggressive"
;
; Lcl frame size = 80
G_M37634_IG01: ;; offset=0000H
55 push rbp
4883EC50 sub rsp, 80
C5F877 vzeroupper
488D6C2450 lea rbp, [rsp+50H]
;; size=13 bbWeight=1 PerfScore 2.75
G_M37634_IG02: ;; offset=000DH
C5FA6F07 vmovdqu xmm0, xmmword ptr [rdi]
C5F9100DD7000000 vmovupd xmm1, xmmword ptr [reloc @RWD00]
C4E27917C1 vptest xmm0, xmm1
7555 jne SHORT G_M37634_IG04
;; size=19 bbWeight=1 PerfScore 11.00
G_M37634_IG03: ;; offset=0020H
488BCE mov rcx, rsi
C5F967D0 vpackuswb xmm2, xmm0, xmm0
C5F92955D0 vmovapd xmmword ptr [rbp-30H], xmm2
488B45D0 mov rax, qword ptr [rbp-30H]
488945F8 mov qword ptr [rbp-08H], rax
488B45F8 mov rax, qword ptr [rbp-08H]
488901 mov qword ptr [rcx], rax
B808000000 mov eax, 8
40F6C608 test sil, 8
7535 jne SHORT G_M37634_IG06
C5FA6F4710 vmovdqu xmm0, xmmword ptr [rdi+16]
C4E27917C1 vptest xmm0, xmm1
7568 jne SHORT G_M37634_IG08
C5F967D0 vpackuswb xmm2, xmm0, xmm0
C5F92955C0 vmovapd xmmword ptr [rbp-40H], xmm2
488B45C0 mov rax, qword ptr [rbp-40H]
488945E8 mov qword ptr [rbp-18H], rax
488B45E8 mov rax, qword ptr [rbp-18H]
4C8D4108 lea r8, bword ptr [rcx+8]
498900 mov qword ptr [r8], rax
EB08 jmp SHORT G_M37634_IG06
align [0 bytes for IG07]
;; size=80 bbWeight=0.50 PerfScore 12.12
G_M37634_IG04: ;; offset=0070H
33C0 xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M37634_IG05: ;; offset=0072H
4883C450 add rsp, 80
5D pop rbp
C3 ret
;; size=6 bbWeight=0.50 PerfScore 0.88
G_M37634_IG06: ;; offset=0078H
4883E60F and rsi, 15
B810000000 mov eax, 16
482BC6 sub rax, rsi
4883EA10 sub rdx, 16
;; size=16 bbWeight=0.50 PerfScore 0.50
G_M37634_IG07: ;; offset=0088H
C5FA6F0447 vmovdqu xmm0, xmmword ptr [rdi+2*rax]
488D7008 lea rsi, [rax+8]
C5FA6F1477 vmovdqu xmm2, xmmword ptr [rdi+2*rsi]
C5F9EBDA vpor xmm3, xmm0, xmm2
C4E27917D9 vptest xmm3, xmm1
751A jne SHORT G_M37634_IG09
C5F967D2 vpackuswb xmm2, xmm0, xmm2
C5FA7F1401 vmovdqu xmmword ptr [rcx+rax], xmm2
4883C010 add rax, 16
483BC2 cmp rax, rdx
76D5 jbe SHORT G_M37634_IG07
;; size=43 bbWeight=4 PerfScore 69.33
G_M37634_IG08: ;; offset=00B3H
4883C450 add rsp, 80
5D pop rbp
C3 ret
;; size=6 bbWeight=0.50 PerfScore 0.88
G_M37634_IG09: ;; offset=00B9H
C4E27917C1 vptest xmm0, xmm1
75F3 jne SHORT G_M37634_IG08
C5F967D0 vpackuswb xmm2, xmm0, xmm0
C5F92955B0 vmovapd xmmword ptr [rbp-50H], xmm2
488B7DB0 mov rdi, qword ptr [rbp-50H]
48897DF0 mov qword ptr [rbp-10H], rdi
488B7DF0 mov rdi, qword ptr [rbp-10H]
4803C8 add rcx, rax
488939 mov qword ptr [rcx], rdi
488BC6 mov rax, rsi
EBD3 jmp SHORT G_M37634_IG08
;; size=39 bbWeight=0.50 PerfScore 6.25
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
; Total bytes of code 224, prolog size 13, PerfScore 127.63, instruction count 63, allocated bytes for code 238 (MethodHash=fbf96cfd) for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Intrinsified(long,long,long):long
; ============================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment