Last active
March 18, 2024 21:54
-
-
Save kg/ad960ba4b98aebe3473938d7f74b6dc3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#define gint32 int32_t | |
#define guint32 uint32_t | |
#define guint64 uint64_t | |
#define guint8 uint8_t | |
typedef guint8 v128_u1 __attribute__ ((vector_size (16))); | |
typedef gint32 v128_i4 __attribute__ ((vector_size (16))); | |
static gint32 | |
decode_value (guint8 *ptr, guint8 **new_ptr) | |
{ | |
// *(bytes *)ptr and *(guint32 *)ptr by themselves don't force an i32 load of | |
// ptr in either x64 or wasm clang, so this is the only way to prefetch all the bytes | |
// without doing this, decode_value will do 5 individual single-byte memory loads, | |
// and each individual load is potentially bounds-checked. we produce one wide load | |
// we could overrun the source buffer by up to 11 bytes, but doing that on wasm is | |
// safe unless we're decoding from the absolute end of memory. | |
// we pad all buffers by 16 bytes in mono_wasm_load_bytes_into_heap, so we're fine | |
union { | |
v128_u1 b; | |
v128_i4 i; | |
} v; | |
v.b = *(v128_u1 *)ptr; | |
gint32 result; | |
// mask and shift two bits so we can have a 4-element jump table in wasm | |
guint8 flags = (v.b[0] & (0x80u | 0x40u)) >> 6; | |
switch (flags) { | |
case 0b00u: | |
case 0b01u: | |
// if (b & 0x80) == 0 | |
result = v.b[0]; | |
++ptr; | |
break; | |
case 0b10u: | |
// (b * 0x80) != 0, and (b & 0x40) == 0 | |
// v.b = { ptr[1], ptr[0], ptr[0], ptr[0] } | |
v.b = __builtin_shufflevector( | |
v.b, v.b, | |
1, 0, 0, 0, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1 | |
); | |
// result = v.b[0..3] where v.b[1..2] = 0 and v.b[0] &= 0x3F | |
result = v.i[0] & 0x3FFF; | |
ptr += 2; | |
break; | |
case 0b11u: | |
// i don't know why the default case is necessary here, but without it the jump table has 5 entries. | |
default: | |
// (b * 0x80) != 0, and (b & 0x40) != 0 | |
if (v.b[0] == 0xFFu) { | |
// v.b = { ptr[4], ptr[3], ptr[2], ptr[1] } | |
v.b = __builtin_shufflevector( | |
v.b, v.b, | |
4, 3, 2, 1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1 | |
); | |
// result = v.b[0..3]; | |
result = v.i[0]; | |
ptr += 5; | |
} else { | |
// v.b = { ptr[3], ptr[2], ptr[1], ptr[0] } | |
v.b = __builtin_shufflevector( | |
v.b, v.b, | |
3, 2, 1, 0, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1 | |
); | |
// result = v.b[0..3] where v.b[0] &= 0x1F | |
result = v.i[0] & 0x1FFFFFFF; | |
ptr += 4; | |
} | |
break; | |
} | |
if (new_ptr) | |
*new_ptr = ptr; | |
return result; | |
} | |
gint32 | |
decode_value_scalar (guint8 *ptr, guint8 **rptr) | |
{ | |
guint8 b = *ptr; | |
gint32 len; | |
if ((b & 0x80) == 0){ | |
len = b; | |
++ptr; | |
} else if ((b & 0x40) == 0){ | |
len = ((b & 0x3f) << 8 | ptr [1]); | |
ptr += 2; | |
} else if (b != 0xff) { | |
len = ((b & 0x1f) << 24) | | |
(ptr [1] << 16) | | |
(ptr [2] << 8) | | |
ptr [3]; | |
ptr += 4; | |
} | |
else { | |
len = (ptr [1] << 24) | (ptr [2] << 16) | (ptr [3] << 8) | ptr [4]; | |
ptr += 5; | |
} | |
if (rptr) | |
*rptr = ptr; | |
return len; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment