Skip to content

Instantly share code, notes, and snippets.

@SwapnilGaikwad
Created June 21, 2022 09:55
Show Gist options
  • Save SwapnilGaikwad/a16089e23cd9d57145c0afb591ad7a7f to your computer and use it in GitHub Desktop.
Save SwapnilGaikwad/a16089e23cd9d57145c0afb591ad7a7f to your computer and use it in GitHub Desktop.
; Assembly listing for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Intrinsified(long,long,long):long
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 17 single block inlinees; 28 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T07] ( 3, 3 ) long -> r15 single-def
; V01 arg1 [V01,T02] ( 7, 8 ) long -> rbx single-def
; V02 arg2 [V02,T05] ( 6, 4.50) long -> r14 single-def
;* V03 loc0 [V03,T21] ( 0, 0 ) int -> zero-ref
;* V04 loc1 [V04,T22] ( 0, 0 ) long -> zero-ref
; V05 loc2 [V05,T01] ( 5, 10.50) byref -> r15 single-def
; V06 loc3 [V06,T23] ( 14, 18.50) simd16 -> [rbp-40H]
; V07 loc4 [V07,T04] ( 5, 6 ) byref -> r12 single-def
; V08 loc5 [V08,T25] ( 8, 11 ) simd16 -> mm0
; V09 loc6 [V09,T00] ( 17, 33 ) long -> r13
; V10 loc7 [V10,T08] ( 2, 4.50) long -> r14
; V11 loc8 [V11,T24] ( 3, 12 ) simd16 -> [rbp-50H] spill-single-def
;* V12 loc9 [V12 ] ( 0, 0 ) simd16 -> zero-ref
;# V13 OutArgs [V13 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
;* V14 tmp1 [V14 ] ( 0, 0 ) int -> zero-ref
; V15 tmp2 [V15 ] ( 2, 2 ) struct ( 8) [rbp-58H] do-not-enreg[SB] "struct address for call/obj"
; V16 tmp3 [V16,T11] ( 2, 1 ) int -> rdi
; V17 tmp4 [V17 ] ( 2, 2 ) struct ( 8) [rbp-60H] do-not-enreg[SB] "struct address for call/obj"
; V18 tmp5 [V18 ] ( 2, 2 ) struct ( 8) [rbp-68H] do-not-enreg[SB] "struct address for call/obj"
;* V19 tmp6 [V19 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V20 tmp7 [V20 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V21 tmp8 [V21 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V22 tmp9 [V22 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V23 tmp10 [V23,T15] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V24 tmp11 [V24 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V25 tmp12 [V25 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V26 tmp13 [V26 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V27 tmp14 [V27,T27] ( 2, 2 ) simd16 -> [rbp-80H] do-not-enreg[S] ld-addr-op "Inlining Arg"
;* V28 tmp15 [V28 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V29 tmp16 [V29 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V30 tmp17 [V30 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V31 tmp18 [V31 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V32 tmp19 [V32,T16] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V33 tmp20 [V33 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V34 tmp21 [V34 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V35 tmp22 [V35 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V36 tmp23 [V36,T28] ( 2, 2 ) simd16 -> [rbp-90H] do-not-enreg[S] ld-addr-op "Inlining Arg"
;* V37 tmp24 [V37 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
; V38 tmp25 [V38,T10] ( 2, 2 ) byref -> rsi "Inlining Arg"
;* V39 tmp26 [V39 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V40 tmp27 [V40 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V41 tmp28 [V41 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V42 tmp29 [V42 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V43 tmp30 [V43 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V44 tmp31 [V44 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V45 tmp32 [V45 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V46 tmp33 [V46 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V47 tmp34 [V47 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V48 tmp35 [V48,T06] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V49 tmp36 [V49 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V50 tmp37 [V50 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V51 tmp38 [V51 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V52 tmp39 [V52 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V53 tmp40 [V53 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V54 tmp41 [V54,T17] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V55 tmp42 [V55 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V56 tmp43 [V56 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V57 tmp44 [V57 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V58 tmp45 [V58 ] ( 0, 0 ) bool -> zero-ref "Inlining Arg"
;* V59 tmp46 [V59 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
; V60 tmp47 [V60,T29] ( 2, 2 ) simd16 -> [rbp-A0H] do-not-enreg[S] ld-addr-op "Inlining Arg"
;* V61 tmp48 [V61 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
; V62 tmp49 [V62,T09] ( 4, 4 ) byref -> r12 "Inlining Arg"
;* V63 tmp50 [V63 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
;* V64 tmp51 [V64 ] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V65 tmp52 [V65 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
; V66 tmp53 [V66,T12] ( 2, 1.50) long -> [rbp-58H] do-not-enreg[] V15._00(offs=0x00) P-DEP "field V15._00 (fldOffset=0x0)"
; V67 tmp54 [V67,T13] ( 2, 1.50) long -> [rbp-60H] do-not-enreg[] V17._00(offs=0x00) P-DEP "field V17._00 (fldOffset=0x0)"
; V68 tmp55 [V68,T14] ( 2, 1.50) long -> [rbp-68H] do-not-enreg[] V18._00(offs=0x00) P-DEP "field V18._00 (fldOffset=0x0)"
; V69 tmp56 [V69,T18] ( 2, 1 ) long -> rax V29._00(offs=0x00) P-INDEP "field V29._00 (fldOffset=0x0)"
;* V70 tmp57 [V70 ] ( 0, 0 ) long -> zero-ref V31._00(offs=0x00) P-INDEP "field V31._00 (fldOffset=0x0)"
; V71 tmp58 [V71,T19] ( 2, 1 ) long -> rdi V39._00(offs=0x00) P-INDEP "field V39._00 (fldOffset=0x0)"
;* V72 tmp59 [V72 ] ( 0, 0 ) long -> zero-ref V41._00(offs=0x00) P-INDEP "field V41._00 (fldOffset=0x0)"
; V73 tmp60 [V73,T20] ( 2, 1 ) long -> rdi V63._00(offs=0x00) P-INDEP "field V63._00 (fldOffset=0x0)"
;* V74 tmp61 [V74 ] ( 0, 0 ) long -> zero-ref V65._00(offs=0x00) P-INDEP "field V65._00 (fldOffset=0x0)"
; V75 cse0 [V75,T03] ( 3, 8.50) long -> [rbp-A8H] "CSE - aggressive"
; V76 cse1 [V76,T26] ( 5, 7 ) simd16 -> [rbp-C0H] spill-single-def "CSE - moderate"
;
; Lcl frame size = 152
G_M37634_IG01: ;; offset=0000H
55 push rbp
4157 push r15
4156 push r14
4155 push r13
4154 push r12
53 push rbx
4881EC98000000 sub rsp, 152
C5F877 vzeroupper
488DAC24C0000000 lea rbp, [rsp+C0H]
4C8BFF mov r15, rdi
488BDE mov rbx, rsi
4C8BF2 mov r14, rdx
;; size=37 bbWeight=1 PerfScore 8.50
G_M37634_IG02: ;; offset=0025H
4983FE20 cmp r14, 32
7320 jae SHORT G_M37634_IG04
;; size=6 bbWeight=1 PerfScore 1.25
G_M37634_IG03: ;; offset=002BH
48BF28200058E77E0000 mov rdi, 0x7EE758002028 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE28200058E77E0000 mov rsi, 0x7EE758002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF156DE30D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=32 bbWeight=0.50 PerfScore 3.75
G_M37634_IG04: ;; offset=004BH
C4C17A6F07 vmovdqu xmm0, xmmword ptr [r15]
C5F9100D58020000 vmovupd xmm1, xmmword ptr [reloc @RWD00]
C5F9298D40FFFFFF vmovapd xmmword ptr [rbp-C0H], xmm1
C4E27917C1 vptest xmm0, xmm1
756A jne SHORT G_M37634_IG06
;; size=28 bbWeight=1 PerfScore 12.00
G_M37634_IG05: ;; offset=0067H
4C8BE3 mov r12, rbx
C5F967C0 vpackuswb xmm0, xmm0, xmm0
C5F9294580 vmovapd xmmword ptr [rbp-80H], xmm0
488B4580 mov rax, qword ptr [rbp-80H]
488945A8 mov qword ptr [rbp-58H], rax
488B45A8 mov rax, qword ptr [rbp-58H]
49890424 mov qword ptr [r12], rax
41BD08000000 mov r13d, 8
F6C308 test bl, 8
7555 jne SHORT G_M37634_IG08
C4C17A6F4710 vmovdqu xmm0, xmmword ptr [r15+16]
C4E27917C1 vptest xmm0, xmm1
0F8572010000 jne G_M37634_IG16
C5F967C0 vpackuswb xmm0, xmm0, xmm0
C5F9298570FFFFFF vmovapd xmmword ptr [rbp-90H], xmm0
488BBD70FFFFFF mov rdi, qword ptr [rbp-90H]
48897D98 mov qword ptr [rbp-68H], rdi
488B7D98 mov rdi, qword ptr [rbp-68H]
498D742408 lea rsi, bword ptr [r12+8]
48893E mov qword ptr [rsi], rdi
C5F9288D40FFFFFF vmovapd xmm1, xmmword ptr [rbp-C0H]
EB14 jmp SHORT G_M37634_IG08
;; size=101 bbWeight=0.50 PerfScore 13.62
G_M37634_IG06: ;; offset=00CCH
33C0 xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M37634_IG07: ;; offset=00CEH
4881C498000000 add rsp, 152
5B pop rbx
415C pop r12
415D pop r13
415E pop r14
415F pop r15
5D pop rbp
C3 ret
;; size=18 bbWeight=0.50 PerfScore 2.12
G_M37634_IG08: ;; offset=00E0H
488BFB mov rdi, rbx
4883E70F and rdi, 15
41BD10000000 mov r13d, 16
4C2BEF sub r13, rdi
4D85ED test r13, r13
0F862A010000 jbe G_M37634_IG18
33FF xor edi, edi
4983FD10 cmp r13, 16
400F96C7 setbe dil
4084FF test dil, dil
7529 jne SHORT G_M37634_IG10
;; size=40 bbWeight=0.50 PerfScore 2.50
G_M37634_IG09: ;; offset=0108H
48BF40880058E77E0000 mov rdi, 0x7EE758008840 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE28200058E77E0000 mov rsi, 0x7EE758002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF1590E20D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
C5F9288D40FFFFFF vmovapd xmm1, xmmword ptr [rbp-C0H]
;; size=40 bbWeight=0.50 PerfScore 5.25
G_M37634_IG10: ;; offset=0130H
4D3BEE cmp r13, r14
7629 jbe SHORT G_M37634_IG11
48BF48880058E77E0000 mov rdi, 0x7EE758008848 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE28200058E77E0000 mov rsi, 0x7EE758002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF1563E20D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
C5F9288D40FFFFFF vmovapd xmm1, xmmword ptr [rbp-C0H]
;; size=45 bbWeight=0.50 PerfScore 5.88
G_M37634_IG11: ;; offset=015DH
498BFE mov rdi, r14
492BFD sub rdi, r13
4883FF10 cmp rdi, 16
7329 jae SHORT G_M37634_IG12
48BF50880058E77E0000 mov rdi, 0x7EE758008850 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE28200058E77E0000 mov rsi, 0x7EE758002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF152FE20D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
C5F9288D40FFFFFF vmovapd xmm1, xmmword ptr [rbp-C0H]
;; size=52 bbWeight=0.50 PerfScore 6.12
G_M37634_IG12: ;; offset=0191H
4983EE10 sub r14, 16
;; size=4 bbWeight=0.50 PerfScore 0.12
G_M37634_IG13: ;; offset=0195H
C4817A6F046F vmovdqu xmm0, xmmword ptr [r15+2*r13]
498D4508 lea rax, [r13+8]
C4C17A6F1447 vmovdqu xmm2, xmmword ptr [r15+2*rax]
C5F92955B0 vmovapd xmmword ptr [rbp-50H], xmm2
C5F92945C0 vmovapd xmmword ptr [rbp-40H], xmm0
C5F9EBDA vpor xmm3, xmm0, xmm2
C4E27917D9 vptest xmm3, xmm1
7568 jne SHORT G_M37634_IG19
4A8D3C2B lea rdi, [rbx+r13]
40F6C70F test dil, 15
7420 je SHORT G_M37634_IG15
;; size=47 bbWeight=4 PerfScore 66.33
G_M37634_IG14: ;; offset=01C4H
48BF58880058E77E0000 mov rdi, 0x7EE758008858 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE28200058E77E0000 mov rsi, 0x7EE758002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF15D4E10D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=32 bbWeight=2 PerfScore 15.00
G_M37634_IG15: ;; offset=01E4H
C5F92845C0 vmovapd xmm0, xmmword ptr [rbp-40H]
C5F96745B0 vpackuswb xmm0, xmm0, xmmword ptr [rbp-50H]
C4817A7F042C vmovdqu xmmword ptr [r12+r13], xmm0
4983C510 add r13, 16
4D3BEE cmp r13, r14
C5F9288D40FFFFFF vmovapd xmm1, xmmword ptr [rbp-C0H]
7690 jbe SHORT G_M37634_IG13
;; size=33 bbWeight=4 PerfScore 46.00
G_M37634_IG16: ;; offset=0205H
498BC5 mov rax, r13
;; size=3 bbWeight=0.50 PerfScore 0.12
G_M37634_IG17: ;; offset=0208H
4881C498000000 add rsp, 152
5B pop rbx
415C pop r12
415D pop r13
415E pop r14
415F pop r15
5D pop rbp
C3 ret
;; size=18 bbWeight=0.50 PerfScore 2.12
G_M37634_IG18: ;; offset=021AH
E9E9FEFFFF jmp G_M37634_IG09
;; size=5 bbWeight=0.25 PerfScore 0.50
G_M37634_IG19: ;; offset=021FH
C5F92845C0 vmovapd xmm0, xmmword ptr [rbp-40H]
C4E279178540FFFFFF vptest xmm0, xmmword ptr [rbp-C0H]
48898558FFFFFF mov qword ptr [rbp-A8H], rax
C5F92945C0 vmovapd xmmword ptr [rbp-40H], xmm0
75CA jne SHORT G_M37634_IG16
4903DD add rbx, r13
F6C307 test bl, 7
7420 je SHORT G_M37634_IG20
48BF60880058E77E0000 mov rdi, 0x7EE758008860 ; string handle
488B3F mov rdi, gword ptr [rdi]
48BE28200058E77E0000 mov rsi, 0x7EE758002028 ; string handle
488B36 mov rsi, gword ptr [rsi]
FF1555E10D00 call [System.Diagnostics.Debug:Fail(System.String,System.String)]
;; size=68 bbWeight=0.50 PerfScore 10.00
G_M37634_IG20: ;; offset=0263H
C5F92845C0 vmovapd xmm0, xmmword ptr [rbp-40H]
C5F967C0 vpackuswb xmm0, xmm0, xmm0
C5F9298560FFFFFF vmovapd xmmword ptr [rbp-A0H], xmm0
488BBD60FFFFFF mov rdi, qword ptr [rbp-A0H]
48897DA0 mov qword ptr [rbp-60H], rdi
488B7DA0 mov rdi, qword ptr [rbp-60H]
4D03E5 add r12, r13
49893C24 mov qword ptr [r12], rdi
488B8558FFFFFF mov rax, qword ptr [rbp-A8H]
4C8BE8 mov r13, rax
E96CFFFFFF jmp G_M37634_IG16
;; size=54 bbWeight=0.50 PerfScore 6.25
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
; Total bytes of code 665, prolog size 37, PerfScore 276.18, instruction count 148, allocated bytes for code 686 (MethodHash=fbf96cfd) for method System.Text.ASCIIUtility:NarrowUtf16ToAscii_Intrinsified(long,long,long):long
; ============================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment