Skip to content

Instantly share code, notes, and snippets.

@SwapnilGaikwad
Created July 5, 2022 13:50
Show Gist options
  • Save SwapnilGaikwad/fe49e0e1b5e27aeb91ca66c3e8034647 to your computer and use it in GitHub Desktop.
Save SwapnilGaikwad/fe49e0e1b5e27aeb91ca66c3e8034647 to your computer and use it in GitHub Desktop.
; Assembly listing for method System.Text.Tests.AsciiUtilityTests:GetIndexOfFirstNonAsciiChar_Intrinsifed(long,long):long
; Emitting BLENDED_CODE for generic ARM64 CPU - Unix
; optimized code
; fp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 9 single block inlinees; 7 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T04] ( 3, 2.50) long -> x0 single-def
; V01 arg1 [V01,T02] ( 17, 10 ) long -> x1
;* V02 loc0 [V02,T14] ( 0, 0 ) int -> zero-ref single-def
;* V03 loc1 [V03,T15] ( 0, 0 ) int -> zero-ref single-def
; V04 loc2 [V04,T17] ( 15, 14.50) simd16 -> d16 HFA(simd16)
; V05 loc3 [V05,T18] ( 3, 8.50) simd16 -> d17 HFA(simd16)
;* V06 loc4 [V06 ] ( 0, 0 ) int -> zero-ref
; V07 loc5 [V07,T00] ( 32, 30 ) long -> x19
; V08 loc6 [V08,T06] ( 8, 4 ) long -> x20 single-def
;* V09 loc7 [V09 ] ( 0, 0 ) long -> zero-ref
; V10 loc8 [V10,T08] ( 3, 1.50) int -> x0
; V11 loc9 [V11,T05] ( 2, 4.50) long -> x0 single-def
;* V12 loc10 [V12 ] ( 0, 0 ) simd16 -> zero-ref HFA(simd16)
;* V13 loc11 [V13 ] ( 0, 0 ) simd16 -> zero-ref HFA(simd16)
;* V14 loc12 [V14 ] ( 0, 0 ) long -> zero-ref
; V15 loc13 [V15,T07] ( 4, 2 ) long -> x0
;* V16 loc14 [V16 ] ( 0, 0 ) int -> zero-ref
;# V17 OutArgs [V17 ] ( 1, 1 ) lclBlk ( 0) [sp+00H] "OutgoingArgSpace"
; V18 tmp1 [V18,T19] ( 3, 3 ) simd16 -> d16 HFA(simd16) "dup spill"
; V19 tmp2 [V19,T01] ( 3, 24 ) struct (32) [fp+10H] HFA(simd16) do-not-enreg[SFR] multireg-ret "Return value temp for multireg intrinsic"
;* V20 tmp3 [V20,T10] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V21 tmp4 [V21 ] ( 0, 0 ) simd16 -> zero-ref HFA(simd16) "Inlining Arg"
;* V22 tmp5 [V22,T03] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
; V23 tmp6 [V23,T16] ( 3, 24 ) simd16 -> d18 HFA(simd16) "Inlining Arg"
;* V24 tmp7 [V24,T11] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V25 tmp8 [V25 ] ( 0, 0 ) simd16 -> zero-ref HFA(simd16) "Inlining Arg"
;* V26 tmp9 [V26,T12] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V27 tmp10 [V27 ] ( 0, 0 ) simd16 -> zero-ref HFA(simd16) "Inlining Arg"
;* V28 tmp11 [V28,T13] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
;* V29 tmp12 [V29 ] ( 0, 0 ) simd16 -> zero-ref HFA(simd16) "Inlining Arg"
;* V30 tmp13 [V30 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
;* V31 tmp14 [V31 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
;* V32 tmp15 [V32 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
;* V33 tmp16 [V33 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
; V34 cse0 [V34,T09] ( 3, 1.50) long -> x0 "CSE - moderate"
;
; Lcl frame size = 32
G_M21315_IG01:
stp fp, lr, [sp,#-64]!
stp x19, x20, [sp,#48]
mov fp, sp
;; size=12 bbWeight=1 PerfScore 2.50
G_M21315_IG02:
cbnz x1, G_M21315_IG05
;; size=4 bbWeight=1 PerfScore 1.00
G_M21315_IG03:
mov x0, xzr
;; size=4 bbWeight=0.50 PerfScore 0.25
G_M21315_IG04:
ldp x19, x20, [sp,#48]
ldp fp, lr, [sp],#64
ret lr
;; size=12 bbWeight=0.50 PerfScore 1.50
G_M21315_IG05:
mov x19, x0
mov x20, x19
cmp x1, #8
blo G_M21315_IG15
ld1 {v16.8h}, [x20]
umaxp v17.8h, v16.8h, v16.8h
umov x0, v17.d[0]
tst x0, #0xd1ffab1e
bne G_M21315_IG13
lsl x1, x1, #1
cmp x1, #32
blo G_M21315_IG08
add x0, x20, #16
and x19, x0, #-16
sub x0, x19, x20
sub x1, x1, x0
cmp x1, #32
blo G_M21315_IG07
add x0, x19, x1
sub x0, x0, #32
align [0 bytes for IG06]
align [0 bytes]
align [0 bytes]
align [0 bytes]
;; size=80 bbWeight=0.50 PerfScore 8.00
G_M21315_IG06:
ldp q16, q17, [x19]
str q16, [fp,#16]
str q17, [fp,#32]
ldr q16, [fp,#16]
ldr q17, [fp,#32]
orr v18.8h, v16.8h, v17.8h
umaxp v18.8h, v18.8h, v18.8h
umov x2, v18.d[0]
tst x2, #0xd1ffab1e
bne G_M21315_IG12
add x19, x19, #32
cmp x19, x0
bls G_M21315_IG06
;; size=52 bbWeight=4 PerfScore 60.00
G_M21315_IG07:
tbz x1, #4, G_M21315_IG09
ld1 {v16.8h}, [x19]
umaxp v17.8h, v16.8h, v16.8h
umov x0, v17.d[0]
tst x0, #0xd1ffab1e
bne G_M21315_IG13
;; size=24 bbWeight=0.50 PerfScore 3.75
G_M21315_IG08:
add x19, x19, #16
;; size=4 bbWeight=0.50 PerfScore 0.25
G_M21315_IG09:
uxtb w0, w1
tst w0, #15
beq G_M21315_IG10
and x0, x1, #15
add x0, x0, x19
sub x19, x0, #16
ld1 {v16.8h}, [x19]
umaxp v17.8h, v16.8h, v16.8h
umov x0, v17.d[0]
tst x0, #0xd1ffab1e
bne G_M21315_IG13
add x19, x19, #16
;; size=48 bbWeight=0.50 PerfScore 5.25
G_M21315_IG10:
sub x0, x19, x20
lsr x0, x0, #1
;; size=8 bbWeight=0.50 PerfScore 0.75
G_M21315_IG11:
ldp x19, x20, [sp,#48]
ldp fp, lr, [sp],#64
ret lr
;; size=12 bbWeight=0.50 PerfScore 1.50
G_M21315_IG12:
umaxp v18.8h, v16.8h, v16.8h
umov x1, v18.d[0]
tst x1, #0xd1ffab1e
bne G_M21315_IG13
add x19, x19, #16
mov v16.16b, v17.16b
;; size=24 bbWeight=0.50 PerfScore 2.25
G_M21315_IG13:
ldr q17, [@RWD00]
cmhi v16.8h, v16.8h, v17.8h
uzp2 v16.16b, v16.16b, v16.16b
umov x0, v16.d[0]
rbit x0, x0
clz x0, x0
asr w0, w0, #3
sbfiz x0, x0, #1, #32
add x19, x0, x19
b G_M21315_IG10
;; size=40 bbWeight=0.50 PerfScore 5.00
G_M21315_IG14:
movz x1, #0xd1ffab1e // code for System.Text.Tests.AsciiUtilityTests:FirstCharInUInt32IsAscii
movk x1, #0xd1ffab1e LSL #16
movk x1, #0xd1ffab1e LSL #32
ldr x1, [x1]
blr x1
cbz w0, G_M21315_IG10
add x19, x19, #2
b G_M21315_IG10
;; size=32 bbWeight=0.50 PerfScore 4.00
G_M21315_IG15:
tbz w1, #2, G_M21315_IG17
ldr x0, [x20]
and x0, x0, #0xd1ffab1e
cbz x0, G_M21315_IG16
rbit x1, x0
clz x0, x1
asr w0, w0, #3
sxtw x0, w0
and x0, x0, #-2
add x19, x20, x0
b G_M21315_IG10
;; size=44 bbWeight=0.50 PerfScore 5.25
G_M21315_IG16:
add x19, x20, #8
;; size=4 bbWeight=0.50 PerfScore 0.25
G_M21315_IG17:
tbz w1, #1, G_M21315_IG18
ldr w0, [x19]
tst w0, #0xd1ffab1e
bne G_M21315_IG14
add x19, x19, #4
;; size=20 bbWeight=0.50 PerfScore 3.00
G_M21315_IG18:
tbz w1, #0, G_M21315_IG10
ldrh w0, [x19]
cmp w0, #127
bgt G_M21315_IG10
add x19, x19, #2
b G_M21315_IG10
;; size=24 bbWeight=0.50 PerfScore 3.50
RWD00 dq 007F007F007F007Fh, 007F007F007F007Fh
; Total bytes of code 448, prolog size 12, PerfScore 152.80, instruction count 116, allocated bytes for code 448 (MethodHash=6043acbc) for method System.Text.Tests.AsciiUtilityTests:GetIndexOfFirstNonAsciiChar_Intrinsifed(long,long):long
; ============================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment