Created
July 18, 2024 03:20
-
-
Save cyb70289/6a6f08a5bfc789878d83abde2a68a14e to your computer and use it in GitHub Desktop.
sonic-decode-opt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/cmake/set_arch_flags.cmake b/cmake/set_arch_flags.cmake | |
index 538ddfe..81c40e4 100644 | |
--- a/cmake/set_arch_flags.cmake | |
+++ b/cmake/set_arch_flags.cmake | |
@@ -2,8 +2,8 @@ function(set_arch_flags target arch) | |
message(STATUS "Setting architecture flags for ${arch}") | |
if(arch MATCHES "x86_64") | |
target_compile_options(${target} PRIVATE -mavx2 -mpclmul -mbmi -mlzcnt) | |
- elseif(arch MATCHES "arm") | |
- target_compile_options(${target} PRIVATE -march=armv8-a) | |
+ elseif(arch MATCHES "arm|aarch64") | |
+ target_compile_options(${target} PRIVATE -march=armv8-a+sve2 -msve-vector-bits=128) | |
else() | |
message(FATAL_ERROR "Unsupported architecture: ${arch}") | |
endif() | |
diff --git a/include/sonic/internal/arch/neon/skip.h b/include/sonic/internal/arch/neon/skip.h | |
index 8be0926..069093a 100644 | |
--- a/include/sonic/internal/arch/neon/skip.h | |
+++ b/include/sonic/internal/arch/neon/skip.h | |
@@ -189,9 +189,11 @@ sonic_force_inline uint8_t skip_space(const uint8_t *data, size_t &pos, | |
// current pos is out of block | |
while (1) { | |
- uint64_t nonspace = GetNonSpaceBits(data + pos); | |
- if (nonspace) { | |
- pos += TrailingZeroes(nonspace) >> 2; | |
+ const svbool_t pmatch = GetNonSpaceBits(data + pos); | |
+ const svbool_t ptrue = svptrue_b8(); | |
+ if (svptest_any(ptrue, pmatch)) { | |
+ const uint64_t nonspace = svcntp_b8(ptrue, svbrkb_z(ptrue, pmatch)); | |
+ pos += nonspace; | |
return data[pos++]; | |
} else { | |
pos += 16; | |
diff --git a/include/sonic/internal/arch/neon/unicode.h b/include/sonic/internal/arch/neon/unicode.h | |
index 933a0e8..88da7a2 100644 | |
--- a/include/sonic/internal/arch/neon/unicode.h | |
+++ b/include/sonic/internal/arch/neon/unicode.h | |
@@ -26,6 +26,10 @@ | |
#include "base.h" | |
#include "simd.h" | |
+#include <arm_sve.h> | |
+ | |
+typedef svuint8_t svuint8x16_t __attribute__((arm_sve_vector_bits(128))); | |
+ | |
namespace sonic_json { | |
namespace internal { | |
namespace neon { | |
@@ -36,63 +40,77 @@ struct StringBlock { | |
public: | |
sonic_force_inline static StringBlock Find(const uint8_t *src); | |
sonic_force_inline static StringBlock Find(uint8x16_t &v); | |
+ // has quote, and no backslash or unescaped before it | |
sonic_force_inline bool HasQuoteFirst() const { | |
- return (((bs_bits - 1) & quote_bits) != 0) && !HasUnescaped(); | |
+ return (bs_index > quote_index) && !HasUnescaped(); | |
} | |
+ // has backslash, and no quote before it | |
sonic_force_inline bool HasBackslash() const { | |
- return ((quote_bits - 1) & bs_bits) != 0; | |
+ return quote_index > bs_index; | |
} | |
+ // has unescaped, and no quote before it | |
sonic_force_inline bool HasUnescaped() const { | |
- return ((quote_bits - 1) & unescaped_bits) != 0; | |
+ return quote_index > unescaped_index; | |
} | |
sonic_force_inline int QuoteIndex() const { | |
- // return TrailingZeroes(quote_bits); | |
- return TrailingZeroes(quote_bits) >> 2; | |
+ sonic_assert(quote_index < 16); | |
+ return quote_index; | |
} | |
sonic_force_inline int BsIndex() const { | |
- // return TrailingZeroes(bs_bits); | |
- return TrailingZeroes(bs_bits) >> 2; | |
+ sonic_assert(bs_index < 16); | |
+ return bs_index; | |
} | |
sonic_force_inline int UnescapedIndex() const { | |
- // return TrailingZeroes(unescaped_bits); | |
- return TrailingZeroes(unescaped_bits) >> 2; | |
+ sonic_assert(unescaped_index < 16); | |
+ return unescaped_index; | |
} | |
- uint64_t bs_bits; | |
- uint64_t quote_bits; | |
- uint64_t unescaped_bits; | |
+ // 0 ~ 15: bit position of first token, 16 - not found | |
+ unsigned bs_index; | |
+ unsigned quote_index; | |
+ unsigned unescaped_index; | |
}; | |
+sonic_force_inline unsigned locate_token(const svuint8x16_t v, char token) { | |
+ const svbool_t ptrue = svptrue_b8(); | |
+ svbool_t pmatch = svmatch(ptrue, v, svdup_n_u8(static_cast<uint8_t>(token))); | |
+ return static_cast<unsigned>(svcntp_b8(ptrue, svbrkb_z(ptrue, pmatch))); | |
+} | |
+ | |
sonic_force_inline StringBlock StringBlock::Find(const uint8_t *src) { | |
- uint8x16_t v = vld1q_u8(src); | |
+ svuint8x16_t v = svld1(svptrue_b8(), src); | |
return { | |
- to_bitmask(vceqq_u8(v, vdupq_n_u8('\\'))), | |
- to_bitmask(vceqq_u8(v, vdupq_n_u8('"'))), | |
- to_bitmask(vcleq_u8(v, vdupq_n_u8('\x1f'))), | |
+ locate_token(v, '\\'), | |
+ locate_token(v, '"'), | |
+ locate_token(v, '\x1f'), | |
}; | |
} | |
sonic_force_inline StringBlock StringBlock::Find(uint8x16_t &v) { | |
return { | |
- to_bitmask(vceqq_u8(v, vdupq_n_u8('\\'))), | |
- to_bitmask(vceqq_u8(v, vdupq_n_u8('"'))), | |
- to_bitmask(vcleq_u8(v, vdupq_n_u8('\x1f'))), | |
+ locate_token(v, '\\'), | |
+ locate_token(v, '"'), | |
+ locate_token(v, '\x1f'), | |
}; | |
} | |
-sonic_force_inline uint64_t GetNonSpaceBits(const uint8_t *data) { | |
- uint8x16_t v = vld1q_u8(data); | |
- uint8x16_t m1 = vceqq_u8(v, vdupq_n_u8(' ')); | |
- uint8x16_t m2 = vceqq_u8(v, vdupq_n_u8('\t')); | |
- uint8x16_t m3 = vceqq_u8(v, vdupq_n_u8('\n')); | |
- uint8x16_t m4 = vceqq_u8(v, vdupq_n_u8('\r')); | |
- | |
- uint8x16_t m5 = vorrq_u8(m1, m2); | |
- uint8x16_t m6 = vorrq_u8(m3, m4); | |
- uint8x16_t m7 = vorrq_u8(m5, m6); | |
- uint8x16_t m8 = vmvnq_u8(m7); | |
- | |
- return to_bitmask(m8); | |
+sonic_force_inline svbool_t GetNonSpaceBits(const uint8_t *data) { | |
+ const svbool_t ptrue = svptrue_b8(); | |
+ const svuint8x16_t v = svld1_u8(ptrue, data); | |
+ | |
+ // XXX: use assembly as gcc generates suboptimal code loads data from memory | |
+ // svuint8x16_t tokens = svreinterpret_u8_u32(svdup_n_u32(0x090a0d20U)); | |
+ svuint8x16_t tokens; | |
+ __asm__ ( | |
+ "mov w8, 0x0d20\n\t" | |
+ "movk w8, 0x090a, lsl #16\n\t" | |
+ "mov %[tokens].s, w8" | |
+ : [tokens] "=w" (tokens) | |
+ : | |
+ : "w8" | |
+ ); | |
+ | |
+ return svnmatch_u8(ptrue, v, tokens); | |
} | |
} // namespace neon |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment