Skip to content

Instantly share code, notes, and snippets.

@cyb70289
Created July 18, 2024 03:20
Show Gist options
  • Save cyb70289/6a6f08a5bfc789878d83abde2a68a14e to your computer and use it in GitHub Desktop.
Save cyb70289/6a6f08a5bfc789878d83abde2a68a14e to your computer and use it in GitHub Desktop.
sonic-decode-opt
diff --git a/cmake/set_arch_flags.cmake b/cmake/set_arch_flags.cmake
index 538ddfe..81c40e4 100644
--- a/cmake/set_arch_flags.cmake
+++ b/cmake/set_arch_flags.cmake
@@ -2,8 +2,8 @@ function(set_arch_flags target arch)
message(STATUS "Setting architecture flags for ${arch}")
if(arch MATCHES "x86_64")
target_compile_options(${target} PRIVATE -mavx2 -mpclmul -mbmi -mlzcnt)
- elseif(arch MATCHES "arm")
- target_compile_options(${target} PRIVATE -march=armv8-a)
+ elseif(arch MATCHES "arm|aarch64")
+ target_compile_options(${target} PRIVATE -march=armv8-a+sve2 -msve-vector-bits=128)
else()
message(FATAL_ERROR "Unsupported architecture: ${arch}")
endif()
diff --git a/include/sonic/internal/arch/neon/skip.h b/include/sonic/internal/arch/neon/skip.h
index 8be0926..069093a 100644
--- a/include/sonic/internal/arch/neon/skip.h
+++ b/include/sonic/internal/arch/neon/skip.h
@@ -189,9 +189,11 @@ sonic_force_inline uint8_t skip_space(const uint8_t *data, size_t &pos,
// current pos is out of block
while (1) {
- uint64_t nonspace = GetNonSpaceBits(data + pos);
- if (nonspace) {
- pos += TrailingZeroes(nonspace) >> 2;
+ const svbool_t pmatch = GetNonSpaceBits(data + pos);
+ const svbool_t ptrue = svptrue_b8();
+ if (svptest_any(ptrue, pmatch)) {
+ const uint64_t nonspace = svcntp_b8(ptrue, svbrkb_z(ptrue, pmatch));
+ pos += nonspace;
return data[pos++];
} else {
pos += 16;
diff --git a/include/sonic/internal/arch/neon/unicode.h b/include/sonic/internal/arch/neon/unicode.h
index 933a0e8..88da7a2 100644
--- a/include/sonic/internal/arch/neon/unicode.h
+++ b/include/sonic/internal/arch/neon/unicode.h
@@ -26,6 +26,10 @@
#include "base.h"
#include "simd.h"
+#include <arm_sve.h>
+
+typedef svuint8_t svuint8x16_t __attribute__((arm_sve_vector_bits(128)));
+
namespace sonic_json {
namespace internal {
namespace neon {
@@ -36,63 +40,77 @@ struct StringBlock {
public:
sonic_force_inline static StringBlock Find(const uint8_t *src);
sonic_force_inline static StringBlock Find(uint8x16_t &v);
+ // has quote, and no backslash or unescaped before it
sonic_force_inline bool HasQuoteFirst() const {
- return (((bs_bits - 1) & quote_bits) != 0) && !HasUnescaped();
+ return (bs_index > quote_index) && !HasUnescaped();
}
+ // has backslash, and no quote before it
sonic_force_inline bool HasBackslash() const {
- return ((quote_bits - 1) & bs_bits) != 0;
+ return quote_index > bs_index;
}
+ // has unescaped, and no quote before it
sonic_force_inline bool HasUnescaped() const {
- return ((quote_bits - 1) & unescaped_bits) != 0;
+ return quote_index > unescaped_index;
}
sonic_force_inline int QuoteIndex() const {
- // return TrailingZeroes(quote_bits);
- return TrailingZeroes(quote_bits) >> 2;
+ sonic_assert(quote_index < 16);
+ return quote_index;
}
sonic_force_inline int BsIndex() const {
- // return TrailingZeroes(bs_bits);
- return TrailingZeroes(bs_bits) >> 2;
+ sonic_assert(bs_index < 16);
+ return bs_index;
}
sonic_force_inline int UnescapedIndex() const {
- // return TrailingZeroes(unescaped_bits);
- return TrailingZeroes(unescaped_bits) >> 2;
+ sonic_assert(unescaped_index < 16);
+ return unescaped_index;
}
- uint64_t bs_bits;
- uint64_t quote_bits;
- uint64_t unescaped_bits;
+ // 0 ~ 15: bit position of first token, 16 - not found
+ unsigned bs_index;
+ unsigned quote_index;
+ unsigned unescaped_index;
};
+sonic_force_inline unsigned locate_token(const svuint8x16_t v, char token) {
+ const svbool_t ptrue = svptrue_b8();
+ svbool_t pmatch = svmatch(ptrue, v, svdup_n_u8(static_cast<uint8_t>(token)));
+ return static_cast<unsigned>(svcntp_b8(ptrue, svbrkb_z(ptrue, pmatch)));
+}
+
sonic_force_inline StringBlock StringBlock::Find(const uint8_t *src) {
- uint8x16_t v = vld1q_u8(src);
+ svuint8x16_t v = svld1(svptrue_b8(), src);
return {
- to_bitmask(vceqq_u8(v, vdupq_n_u8('\\'))),
- to_bitmask(vceqq_u8(v, vdupq_n_u8('"'))),
- to_bitmask(vcleq_u8(v, vdupq_n_u8('\x1f'))),
+ locate_token(v, '\\'),
+ locate_token(v, '"'),
+ locate_token(v, '\x1f'),
};
}
sonic_force_inline StringBlock StringBlock::Find(uint8x16_t &v) {
return {
- to_bitmask(vceqq_u8(v, vdupq_n_u8('\\'))),
- to_bitmask(vceqq_u8(v, vdupq_n_u8('"'))),
- to_bitmask(vcleq_u8(v, vdupq_n_u8('\x1f'))),
+ locate_token(v, '\\'),
+ locate_token(v, '"'),
+ locate_token(v, '\x1f'),
};
}
-sonic_force_inline uint64_t GetNonSpaceBits(const uint8_t *data) {
- uint8x16_t v = vld1q_u8(data);
- uint8x16_t m1 = vceqq_u8(v, vdupq_n_u8(' '));
- uint8x16_t m2 = vceqq_u8(v, vdupq_n_u8('\t'));
- uint8x16_t m3 = vceqq_u8(v, vdupq_n_u8('\n'));
- uint8x16_t m4 = vceqq_u8(v, vdupq_n_u8('\r'));
-
- uint8x16_t m5 = vorrq_u8(m1, m2);
- uint8x16_t m6 = vorrq_u8(m3, m4);
- uint8x16_t m7 = vorrq_u8(m5, m6);
- uint8x16_t m8 = vmvnq_u8(m7);
-
- return to_bitmask(m8);
+sonic_force_inline svbool_t GetNonSpaceBits(const uint8_t *data) {
+ const svbool_t ptrue = svptrue_b8();
+ const svuint8x16_t v = svld1_u8(ptrue, data);
+
+ // XXX: use assembly as gcc generates suboptimal code loads data from memory
+ // svuint8x16_t tokens = svreinterpret_u8_u32(svdup_n_u32(0x090a0d20U));
+ svuint8x16_t tokens;
+ __asm__ (
+ "mov w8, 0x0d20\n\t"
+ "movk w8, 0x090a, lsl #16\n\t"
+ "mov %[tokens].s, w8"
+ : [tokens] "=w" (tokens)
+ :
+ : "w8"
+ );
+
+ return svnmatch_u8(ptrue, v, tokens);
}
} // namespace neon
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment