Created
June 21, 2022 13:53
-
-
Save SwapnilGaikwad/e4e65798b924567c57cd0588a5c4740a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Assembly listing for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Intrinsified(long,long,long):long | |
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix | |
; Tier-1 compilation | |
; optimized code | |
; rbp based frame | |
; fully interruptible | |
; No PGO data | |
; 0 inlinees with PGO data; 9 single block inlinees; 20 inlinees without PGO data | |
; Final local variable assignments | |
; | |
; V00 arg0 [V00,T06] ( 3, 3 ) long -> rdi single-def | |
; V01 arg1 [V01,T05] ( 5, 3.50) long -> rsi single-def | |
; V02 arg2 [V02,T07] ( 3, 2.50) long -> rdx single-def | |
;* V03 loc0 [V03,T20] ( 0, 0 ) int -> zero-ref | |
;* V04 loc1 [V04,T21] ( 0, 0 ) long -> zero-ref | |
; V05 loc2 [V05,T01] ( 5, 10.50) byref -> rdi single-def | |
; V06 loc3 [V06,T22] ( 14, 18.50) simd16 -> mm0 | |
; V07 loc4 [V07,T03] ( 5, 6 ) byref -> rcx single-def | |
; V08 loc5 [V08,T24] ( 8, 11 ) simd16 -> mm2 | |
; V09 loc6 [V09,T00] ( 11, 26.50) long -> rax | |
; V10 loc7 [V10,T08] ( 2, 4.50) long -> rdx | |
; V11 loc8 [V11,T23] ( 3, 12 ) simd16 -> mm2 | |
;* V12 loc9 [V12 ] ( 0, 0 ) simd16 -> zero-ref | |
;# V13 OutArgs [V13 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace" | |
; V14 tmp1 [V14 ] ( 2, 2 ) struct ( 8) [rbp-08H] do-not-enreg[SB] "struct address for call/obj" | |
; V15 tmp2 [V15 ] ( 2, 2 ) struct ( 8) [rbp-10H] do-not-enreg[SB] "struct address for call/obj" | |
; V16 tmp3 [V16 ] ( 2, 2 ) struct ( 8) [rbp-18H] do-not-enreg[SB] "struct address for call/obj" | |
;* V17 tmp4 [V17,T14] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
;* V18 tmp5 [V18 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
;* V19 tmp6 [V19 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
; V20 tmp7 [V20,T26] ( 2, 2 ) simd16 -> [rbp-30H] do-not-enreg[S] ld-addr-op "Inlining Arg" | |
;* V21 tmp8 [V21 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
;* V22 tmp9 [V22 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" | |
;* V23 tmp10 [V23 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
;* V24 tmp11 [V24 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" | |
;* V25 tmp12 [V25,T15] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
;* V26 tmp13 [V26 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
;* V27 tmp14 [V27 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
; V28 tmp15 [V28,T27] ( 2, 2 ) simd16 -> [rbp-40H] do-not-enreg[S] ld-addr-op "Inlining Arg" | |
;* V29 tmp16 [V29 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
; V30 tmp17 [V30,T10] ( 2, 2 ) byref -> r8 "Inlining Arg" | |
;* V31 tmp18 [V31 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" | |
;* V32 tmp19 [V32 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
;* V33 tmp20 [V33 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" | |
;* V34 tmp21 [V34,T04] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
;* V35 tmp22 [V35 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
;* V36 tmp23 [V36 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
;* V37 tmp24 [V37 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
;* V38 tmp25 [V38,T16] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
;* V39 tmp26 [V39 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
;* V40 tmp27 [V40 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" | |
; V41 tmp28 [V41,T28] ( 2, 2 ) simd16 -> [rbp-50H] do-not-enreg[S] ld-addr-op "Inlining Arg" | |
;* V42 tmp29 [V42 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
; V43 tmp30 [V43,T09] ( 4, 4 ) byref -> rcx "Inlining Arg" | |
;* V44 tmp31 [V44 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" | |
;* V45 tmp32 [V45 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp" | |
;* V46 tmp33 [V46 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" | |
; V47 tmp34 [V47,T11] ( 2, 1.50) long -> [rbp-08H] do-not-enreg[] V14._00(offs=0x00) P-DEP "field V14._00 (fldOffset=0x0)" | |
; V48 tmp35 [V48,T12] ( 2, 1.50) long -> [rbp-10H] do-not-enreg[] V15._00(offs=0x00) P-DEP "field V15._00 (fldOffset=0x0)" | |
; V49 tmp36 [V49,T13] ( 2, 1.50) long -> [rbp-18H] do-not-enreg[] V16._00(offs=0x00) P-DEP "field V16._00 (fldOffset=0x0)" | |
; V50 tmp37 [V50,T17] ( 2, 1 ) long -> rax V22._00(offs=0x00) P-INDEP "field V22._00 (fldOffset=0x0)" | |
;* V51 tmp38 [V51 ] ( 0, 0 ) long -> zero-ref V24._00(offs=0x00) P-INDEP "field V24._00 (fldOffset=0x0)" | |
; V52 tmp39 [V52,T18] ( 2, 1 ) long -> rax V31._00(offs=0x00) P-INDEP "field V31._00 (fldOffset=0x0)" | |
;* V53 tmp40 [V53 ] ( 0, 0 ) long -> zero-ref V33._00(offs=0x00) P-INDEP "field V33._00 (fldOffset=0x0)" | |
; V54 tmp41 [V54,T19] ( 2, 1 ) long -> rdi V44._00(offs=0x00) P-INDEP "field V44._00 (fldOffset=0x0)" | |
;* V55 tmp42 [V55 ] ( 0, 0 ) long -> zero-ref V46._00(offs=0x00) P-INDEP "field V46._00 (fldOffset=0x0)" | |
; V56 cse0 [V56,T02] ( 3, 8.50) long -> rsi "CSE - aggressive" | |
; V57 cse1 [V57,T25] ( 5, 7 ) simd16 -> mm1 "CSE - aggressive" | |
; | |
; Lcl frame size = 80 | |
G_M37634_IG01: ;; offset=0000H | |
55 push rbp | |
4883EC50 sub rsp, 80 | |
C5F877 vzeroupper | |
488D6C2450 lea rbp, [rsp+50H] | |
;; size=13 bbWeight=1 PerfScore 2.75 | |
G_M37634_IG02: ;; offset=000DH | |
C5FA6F07 vmovdqu xmm0, xmmword ptr [rdi] | |
C5F9100DD7000000 vmovupd xmm1, xmmword ptr [reloc @RWD00] | |
C4E27917C1 vptest xmm0, xmm1 | |
7555 jne SHORT G_M37634_IG04 | |
;; size=19 bbWeight=1 PerfScore 11.00 | |
G_M37634_IG03: ;; offset=0020H | |
488BCE mov rcx, rsi | |
C5F967D0 vpackuswb xmm2, xmm0, xmm0 | |
C5F92955D0 vmovapd xmmword ptr [rbp-30H], xmm2 | |
488B45D0 mov rax, qword ptr [rbp-30H] | |
488945F8 mov qword ptr [rbp-08H], rax | |
488B45F8 mov rax, qword ptr [rbp-08H] | |
488901 mov qword ptr [rcx], rax | |
B808000000 mov eax, 8 | |
40F6C608 test sil, 8 | |
7535 jne SHORT G_M37634_IG06 | |
C5FA6F4710 vmovdqu xmm0, xmmword ptr [rdi+16] | |
C4E27917C1 vptest xmm0, xmm1 | |
7568 jne SHORT G_M37634_IG08 | |
C5F967D0 vpackuswb xmm2, xmm0, xmm0 | |
C5F92955C0 vmovapd xmmword ptr [rbp-40H], xmm2 | |
488B45C0 mov rax, qword ptr [rbp-40H] | |
488945E8 mov qword ptr [rbp-18H], rax | |
488B45E8 mov rax, qword ptr [rbp-18H] | |
4C8D4108 lea r8, bword ptr [rcx+8] | |
498900 mov qword ptr [r8], rax | |
EB08 jmp SHORT G_M37634_IG06 | |
align [0 bytes for IG07] | |
;; size=80 bbWeight=0.50 PerfScore 12.12 | |
G_M37634_IG04: ;; offset=0070H | |
33C0 xor eax, eax | |
;; size=2 bbWeight=0.50 PerfScore 0.12 | |
G_M37634_IG05: ;; offset=0072H | |
4883C450 add rsp, 80 | |
5D pop rbp | |
C3 ret | |
;; size=6 bbWeight=0.50 PerfScore 0.88 | |
G_M37634_IG06: ;; offset=0078H | |
4883E60F and rsi, 15 | |
B810000000 mov eax, 16 | |
482BC6 sub rax, rsi | |
4883EA10 sub rdx, 16 | |
;; size=16 bbWeight=0.50 PerfScore 0.50 | |
G_M37634_IG07: ;; offset=0088H | |
C5FA6F0447 vmovdqu xmm0, xmmword ptr [rdi+2*rax] | |
488D7008 lea rsi, [rax+8] | |
C5FA6F1477 vmovdqu xmm2, xmmword ptr [rdi+2*rsi] | |
C5F9EBDA vpor xmm3, xmm0, xmm2 | |
C4E27917D9 vptest xmm3, xmm1 | |
751A jne SHORT G_M37634_IG09 | |
C5F967D2 vpackuswb xmm2, xmm0, xmm2 | |
C5FA7F1401 vmovdqu xmmword ptr [rcx+rax], xmm2 | |
4883C010 add rax, 16 | |
483BC2 cmp rax, rdx | |
76D5 jbe SHORT G_M37634_IG07 | |
;; size=43 bbWeight=4 PerfScore 69.33 | |
G_M37634_IG08: ;; offset=00B3H | |
4883C450 add rsp, 80 | |
5D pop rbp | |
C3 ret | |
;; size=6 bbWeight=0.50 PerfScore 0.88 | |
G_M37634_IG09: ;; offset=00B9H | |
C4E27917C1 vptest xmm0, xmm1 | |
75F3 jne SHORT G_M37634_IG08 | |
C5F967D0 vpackuswb xmm2, xmm0, xmm0 | |
C5F92955B0 vmovapd xmmword ptr [rbp-50H], xmm2 | |
488B7DB0 mov rdi, qword ptr [rbp-50H] | |
48897DF0 mov qword ptr [rbp-10H], rdi | |
488B7DF0 mov rdi, qword ptr [rbp-10H] | |
4803C8 add rcx, rax | |
488939 mov qword ptr [rcx], rdi | |
488BC6 mov rax, rsi | |
EBD3 jmp SHORT G_M37634_IG08 | |
;; size=39 bbWeight=0.50 PerfScore 6.25 | |
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h | |
; Total bytes of code 224, prolog size 13, PerfScore 127.63, instruction count 63, allocated bytes for code 238 (MethodHash=fbf96cfd) for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Intrinsified(long,long,long):long | |
; ============================================================ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment