Created
June 21, 2022 13:53
-
-
Save SwapnilGaikwad/6bd218c5113f622e5aafce6bb1feca7b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Assembly listing for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Sse2(long,long,long):long | |
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix | |
; Tier-1 compilation | |
; optimized code | |
; optimized using profile data | |
; rbp based frame | |
; fully interruptible | |
; with PGO: edge weights are valid, and fgCalledCount is 43814 | |
; Final local variable assignments | |
; | |
; V00 arg0 [V00,T01] ( 6,106.31) long -> rdi single-def | |
; V01 arg1 [V01,T02] ( 8, 56.87) long -> rsi single-def | |
; V02 arg2 [V02,T04] ( 3, 3.00) long -> rdx single-def | |
;* V03 loc0 [V03,T05] ( 0, 0 ) int -> zero-ref | |
;* V04 loc1 [V04,T06] ( 0, 0 ) long -> zero-ref | |
; V05 loc2 [V05,T10] ( 2, 52.36) simd16 -> mm0 | |
;* V06 loc3 [V06 ] ( 0, 0 ) simd16 -> zero-ref | |
; V07 loc4 [V07,T07] ( 14,160.52) simd16 -> mm1 | |
; V08 loc5 [V08,T09] ( 8,105.75) simd16 -> mm2 | |
; V09 loc6 [V09,T00] ( 12,310.81) long -> rax | |
; V10 loc7 [V10,T03] ( 2, 52.25) long -> rdx | |
; V11 loc8 [V11,T08] ( 3,153.97) simd16 -> mm2 | |
;* V12 loc9 [V12 ] ( 0, 0 ) simd16 -> zero-ref | |
;# V13 OutArgs [V13 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace" | |
; | |
; Lcl frame size = 0 | |
G_M2855_IG01: ;; offset=0000H | |
55 push rbp | |
C5F877 vzeroupper | |
488BEC mov rbp, rsp | |
;; size=7 bbWeight=1 PerfScore 2.25 | |
G_M2855_IG02: ;; offset=0007H | |
C5F91005B1000000 vmovupd xmm0, xmmword ptr [reloc @RWD00] | |
C5FA6F0F vmovdqu xmm1, xmmword ptr [rdi] | |
C4E279170DA4000000 vptest xmm1, xmmword ptr [reloc @RWD00] | |
0F8592000000 jne G_M2855_IG11 | |
;; size=27 bbWeight=1 PerfScore 13.00 | |
G_M2855_IG03: ;; offset=0022H | |
C5F167D1 vpackuswb xmm2, xmm1, xmm1 | |
C5F9D616 vmovq qword ptr [rsi], xmm2 | |
B808000000 mov eax, 8 | |
40F6C608 test sil, 8 | |
751C jne SHORT G_M2855_IG05 | |
;; size=19 bbWeight=1.00 PerfScore 4.50 | |
G_M2855_IG04: ;; offset=0035H | |
C5FA6F4F10 vmovdqu xmm1, xmmword ptr [rdi+16] | |
C4E279170D7D000000 vptest xmm1, xmmword ptr [reloc @RWD00] | |
754E jne SHORT G_M2855_IG08 | |
C5F167D1 vpackuswb xmm2, xmm1, xmm1 | |
C5F9D65608 vmovq qword ptr [rsi+8], xmm2 | |
;; size=25 bbWeight=0.59 PerfScore 7.72 | |
G_M2855_IG05: ;; offset=004EH | |
488BC6 mov rax, rsi | |
4883E00F and rax, 15 | |
B910000000 mov ecx, 16 | |
482BC8 sub rcx, rax | |
488BC1 mov rax, rcx | |
4883EA10 sub rdx, 16 | |
align [0 bytes for IG06] | |
;; size=22 bbWeight=1.00 PerfScore 1.50 | |
G_M2855_IG06: ;; offset=0064H | |
C5FA6F0C47 vmovdqu xmm1, xmmword ptr [rdi+2*rax] | |
C5FA6F544710 vmovdqu xmm2, xmmword ptr [rdi+2*rax+16] | |
C5F1EBDA vpor xmm3, xmm1, xmm2 | |
C4E27917D8 vptest xmm3, xmm0 | |
7516 jne SHORT G_M2855_IG09 | |
;; size=22 bbWeight=51.36 PerfScore 633.44 | |
G_M2855_IG07: ;; offset=007AH | |
C5F167D2 vpackuswb xmm2, xmm1, xmm2 | |
C5F97F1406 vmovdqa xmmword ptr [rsi+rax], xmm2 | |
4883C010 add rax, 16 | |
483BC2 cmp rax, rdx | |
76D8 jbe SHORT G_M2855_IG06 | |
;; size=18 bbWeight=51.25 PerfScore 230.61 | |
G_M2855_IG08: ;; offset=008CH | |
5D pop rbp | |
C3 ret | |
;; size=2 bbWeight=1.00 PerfScore 1.50 | |
G_M2855_IG09: ;; offset=008EH | |
C4E279170D29000000 vptest xmm1, xmmword ptr [reloc @RWD00] | |
75F3 jne SHORT G_M2855_IG08 | |
;; size=11 bbWeight=0.11 PerfScore 0.69 | |
G_M2855_IG10: ;; offset=0099H | |
C5F167D1 vpackuswb xmm2, xmm1, xmm1 | |
C5F9D61406 vmovq qword ptr [rsi+rax], xmm2 | |
4883C008 add rax, 8 | |
EBE4 jmp SHORT G_M2855_IG08 | |
;; size=15 bbWeight=0.03 PerfScore 0.18 | |
G_M2855_IG11: ;; offset=00A8H | |
33C0 xor eax, eax | |
;; size=2 bbWeight=0 PerfScore 0.00 | |
G_M2855_IG12: ;; offset=00AAH | |
5D pop rbp | |
C3 ret | |
;; size=2 bbWeight=0 PerfScore 0.00 | |
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h | |
; Total bytes of code 172, prolog size 7, PerfScore 913.98, instruction count 45, allocated bytes for code 186 (MethodHash=d994f4d8) for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Sse2(long,long,long):long | |
; ============================================================ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment