Skip to content

Instantly share code, notes, and snippets.

@SwapnilGaikwad
Created June 21, 2022 13:53
Show Gist options
  • Save SwapnilGaikwad/6bd218c5113f622e5aafce6bb1feca7b to your computer and use it in GitHub Desktop.
Save SwapnilGaikwad/6bd218c5113f622e5aafce6bb1feca7b to your computer and use it in GitHub Desktop.
; Assembly listing for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Sse2(long,long,long):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; optimized using profile data
; rbp based frame
; fully interruptible
; with PGO: edge weights are valid, and fgCalledCount is 43814
; Final local variable assignments
;
; V00 arg0 [V00,T01] ( 6,106.31) long -> rdi single-def
; V01 arg1 [V01,T02] ( 8, 56.87) long -> rsi single-def
; V02 arg2 [V02,T04] ( 3, 3.00) long -> rdx single-def
;* V03 loc0 [V03,T05] ( 0, 0 ) int -> zero-ref
;* V04 loc1 [V04,T06] ( 0, 0 ) long -> zero-ref
; V05 loc2 [V05,T10] ( 2, 52.36) simd16 -> mm0
;* V06 loc3 [V06 ] ( 0, 0 ) simd16 -> zero-ref
; V07 loc4 [V07,T07] ( 14,160.52) simd16 -> mm1
; V08 loc5 [V08,T09] ( 8,105.75) simd16 -> mm2
; V09 loc6 [V09,T00] ( 12,310.81) long -> rax
; V10 loc7 [V10,T03] ( 2, 52.25) long -> rdx
; V11 loc8 [V11,T08] ( 3,153.97) simd16 -> mm2
;* V12 loc9 [V12 ] ( 0, 0 ) simd16 -> zero-ref
;# V13 OutArgs [V13 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
;
; Lcl frame size = 0
G_M2855_IG01: ;; offset=0000H
55 push rbp
C5F877 vzeroupper
488BEC mov rbp, rsp
;; size=7 bbWeight=1 PerfScore 2.25
G_M2855_IG02: ;; offset=0007H
C5F91005B1000000 vmovupd xmm0, xmmword ptr [reloc @RWD00]
C5FA6F0F vmovdqu xmm1, xmmword ptr [rdi]
C4E279170DA4000000 vptest xmm1, xmmword ptr [reloc @RWD00]
0F8592000000 jne G_M2855_IG11
;; size=27 bbWeight=1 PerfScore 13.00
G_M2855_IG03: ;; offset=0022H
C5F167D1 vpackuswb xmm2, xmm1, xmm1
C5F9D616 vmovq qword ptr [rsi], xmm2
B808000000 mov eax, 8
40F6C608 test sil, 8
751C jne SHORT G_M2855_IG05
;; size=19 bbWeight=1.00 PerfScore 4.50
G_M2855_IG04: ;; offset=0035H
C5FA6F4F10 vmovdqu xmm1, xmmword ptr [rdi+16]
C4E279170D7D000000 vptest xmm1, xmmword ptr [reloc @RWD00]
754E jne SHORT G_M2855_IG08
C5F167D1 vpackuswb xmm2, xmm1, xmm1
C5F9D65608 vmovq qword ptr [rsi+8], xmm2
;; size=25 bbWeight=0.59 PerfScore 7.72
G_M2855_IG05: ;; offset=004EH
488BC6 mov rax, rsi
4883E00F and rax, 15
B910000000 mov ecx, 16
482BC8 sub rcx, rax
488BC1 mov rax, rcx
4883EA10 sub rdx, 16
align [0 bytes for IG06]
;; size=22 bbWeight=1.00 PerfScore 1.50
G_M2855_IG06: ;; offset=0064H
C5FA6F0C47 vmovdqu xmm1, xmmword ptr [rdi+2*rax]
C5FA6F544710 vmovdqu xmm2, xmmword ptr [rdi+2*rax+16]
C5F1EBDA vpor xmm3, xmm1, xmm2
C4E27917D8 vptest xmm3, xmm0
7516 jne SHORT G_M2855_IG09
;; size=22 bbWeight=51.36 PerfScore 633.44
G_M2855_IG07: ;; offset=007AH
C5F167D2 vpackuswb xmm2, xmm1, xmm2
C5F97F1406 vmovdqa xmmword ptr [rsi+rax], xmm2
4883C010 add rax, 16
483BC2 cmp rax, rdx
76D8 jbe SHORT G_M2855_IG06
;; size=18 bbWeight=51.25 PerfScore 230.61
G_M2855_IG08: ;; offset=008CH
5D pop rbp
C3 ret
;; size=2 bbWeight=1.00 PerfScore 1.50
G_M2855_IG09: ;; offset=008EH
C4E279170D29000000 vptest xmm1, xmmword ptr [reloc @RWD00]
75F3 jne SHORT G_M2855_IG08
;; size=11 bbWeight=0.11 PerfScore 0.69
G_M2855_IG10: ;; offset=0099H
C5F167D1 vpackuswb xmm2, xmm1, xmm1
C5F9D61406 vmovq qword ptr [rsi+rax], xmm2
4883C008 add rax, 8
EBE4 jmp SHORT G_M2855_IG08
;; size=15 bbWeight=0.03 PerfScore 0.18
G_M2855_IG11: ;; offset=00A8H
33C0 xor eax, eax
;; size=2 bbWeight=0 PerfScore 0.00
G_M2855_IG12: ;; offset=00AAH
5D pop rbp
C3 ret
;; size=2 bbWeight=0 PerfScore 0.00
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
; Total bytes of code 172, prolog size 7, PerfScore 913.98, instruction count 45, allocated bytes for code 186 (MethodHash=d994f4d8) for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Sse2(long,long,long):long
; ============================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment