Last active
August 25, 2025 08:55
-
-
Save namandixit/22d61e7e416f7e4637730d3e5ff2a479 to your computer and use it in GitHub Desktop.
Header Prime: The first header everyone should include in their C project (if they are smart, that is)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Creator: Naman Dixit | |
* Notice: © Copyright 2024 Naman Dixit | |
* License: BSD Zero Clause License | |
* SPDX: 0BSD (https://spdx.org/licenses/0BSD.html) | |
*/ | |
#if !defined(STD_H_INCLUDE_GUARD) | |
/* Compiler **************************************************************************/ | |
#if defined(_MSC_VER) | |
# if defined(__clang__) | |
# define ENV_COMPILER_CLANG | |
# define ENV_COMPILER_CLANG_WITH_MSVC | |
# else | |
# define ENV_COMPILER_MSVC | |
# endif | |
#elif defined (__GNUC__) | |
# if defined(__clang__) | |
# define ENV_COMPILER_CLANG | |
# define ENV_COMPILER_CLANG_WITH_GCC | |
# else | |
# define ENV_COMPILER_GCC | |
# endif | |
#elif defined(__clang__) | |
# define ENV_COMPILER_CLANG | |
#else | |
# error Compiler not supported | |
#endif | |
/* Operating System ******************************************************************/ | |
#if defined(_WIN32) | |
# define ENV_OS_WINDOWS | |
#elif defined(__linux__) | |
# define ENV_OS_LINUX | |
#else | |
# error Operating system not supported | |
#endif | |
/* CPU Architecture ******************************************************************/ | |
#if defined(ENV_COMPILER_MSVC) || defined(ENV_COMPILER_CLANG_WITH_MSVC) | |
# if defined(_M_IX86) | |
# define ENV_ARCH_X86 | |
# elif defined(_M_X64) | |
# define ENV_ARCH_X64 | |
# endif | |
#elif defined(ENV_COMPILER_CLANG) || defined(ENV_COMPILER_GCC) | |
# if defined(__i386__) | |
# define ENV_ARCH_X86 | |
# elif defined(__x86_64__) | |
# define ENV_ARCH_X64 | |
# endif | |
#endif | |
#if !defined(ENV_ARCH_X64) // && !defined(ENV_ARCH_X86) | |
# error Architecture not supported | |
#endif | |
/* Word Bit-width ********************************************************************/ | |
#if defined(ENV_ARCH_X86) | |
# define ENV_BITWIDTH_32 | |
# error "Bitwidth not supported" | |
#elif defined(ENV_ARCH_X64) | |
# define ENV_BITWIDTH_64 | |
#else | |
# error "Bitwidth not supported" | |
#endif | |
/* Programming Language **************************************************************/ | |
#if defined(ENV_COMPILER_MSVC) | |
# if defined(__cplusplus) | |
# if __cplusplus == 199711L | |
# define ENV_LANG_CXX 1998 | |
# elif __cplusplus == 201402L | |
# define ENV_LANG_CXX 2014 | |
# elif __cplusplus == 201703L | |
# define ENV_LANG_CXX 2017 | |
# elif __cplusplus == 202002L | |
# define ENV_LANG_CXX 2020 | |
# else | |
# define ENV_LANG_CXX 2017 // A future C++, assume the last best supported | |
# endif | |
# elif defined(__STDC_VERSION__) | |
# if (__STDC_VERSION__ == 201112L) || (__STDC_VERSION__ == 201710L) | |
# define ENV_LANG_C 2011 | |
# elif (__STDC_VERSION__ == 202311L) || (__STDC_VERSION__ == 202312L) // 202312L is with /std:clatest before official release of C23 support | |
# define ENV_LANG_C 2023 | |
# else | |
# define ENV_LANG_C 2011 // Earliest C version for which MSVC supports __STDC_VERSION__ | |
# endif | |
# elif defined(__STDC__) // All Microsoft extensions are off [/Za (Disable Language Extensions), similar to pedantic] | |
# define ENV_LANG_C 1989 | |
# else // /Za (Disable Language Extensions) is not provided, but MS never properly supported C99. | |
# define ENV_LANG_C 1989 | |
# endif | |
#elif defined(ENV_COMPILER_CLANG) || defined(ENV_COMPILER_GCC) | |
# if defined(__cplusplus) | |
# if defined(__OBJC__) | |
# define ENV_LANG_OBJCPP 1 | |
# endif | |
# if __cplusplus == 199711L | |
# define ENV_LANG_CXX 1998 | |
# elif __cplusplus == 201103L | |
# define ENV_LANG_CXX 2011 | |
# elif __cplusplus == 201402L | |
# define ENV_LANG_CXX 2014 | |
# elif __cplusplus == 201703L | |
# define ENV_LANG_CXX 2017 | |
# elif __cplusplus == 202002L | |
# define ENV_LANG_CXX 2020 | |
# elif __cplusplus == 202302L | |
# define ENV_LANG_CXX 2023 | |
# else | |
# define ENV_LANG_CXX 2020 // A future C++, assume the last best supported | |
# endif | |
# elif defined(__STDC_VERSION__) // Using C Language >= 1994 (1989) | |
# if defined(__OBJC__) | |
# define ENV_LANG_OBJC 1 | |
# endif | |
# if (__STDC_VERSION__ == 199409L) | |
# define ENV_LANG_C 1989 | |
# elif (__STDC_VERSION__ == 199901L) | |
# define ENV_LANG_C 1999 | |
# elif (__STDC_VERSION__ == 201112L) || (__STDC_VERSION__ == 201710L) | |
# define ENV_LANG_C 2011 | |
# elif (__STDC_VERSION__ == 202311L) | |
# define ENV_LANG_C 2023 | |
# else | |
# define ENV_LANG_C 1999 // At least C99 (__STDC_VERSION__ is defined, C94 is too old to fallback on) | |
# endif | |
# elif defined(__STDC__) && !defined(__STDC_VERSION__) | |
# if defined(__OBJC__) | |
# define ENV_LANG_OBJC 1 | |
# endif | |
# define ENV_LANG_C 1989 // Since C89 doesn't require definition of __STDC_VERSION__ | |
# endif | |
#endif | |
#if !defined(ENV_LANG_C) && !defined(ENV_LANG_CXX) | |
# error Language not supported | |
#endif | |
/* Endianness ************************************************************************/ | |
#if defined(ENV_OS_WINDOWS) | |
# if defined(ENV_ARCH_X86) || defined(ENV_ARCH_X64) | |
# define ENV_ENDIAN_LITTLE | |
# else | |
# error Could not determine endianness on Windows | |
# endif | |
#elif defined(ENV_OS_LINUX) | |
# include <endian.h> | |
# if __BYTE_ORDER == __LITTLE_ENDIAN | |
# define ENV_ENDIAN_LITTLE | |
# elif __BYTE_ORDER == __BIG_ENDIAN | |
# define ENV_ENDIAN_BIG | |
# else | |
# error Could not determine endianness on Linux | |
# endif | |
#else | |
# error Can not determine endianness, unknown environment | |
#endif | |
/* Constants *************************************************************************/ | |
#define KiB(x) ( (x) * 1024ULL) | |
#define MiB(x) (KiB(x) * 1024ULL) | |
#define GiB(x) (MiB(x) * 1024ULL) | |
#define TiB(x) (GiB(x) * 1024ULL) | |
#define HUNDRED 100L | |
#define THOUSAND 1000L | |
#define MILLION 1000000L | |
#define BILLION 1000000000L | |
/* Attributes ************************************************************************/ | |
//#define macro_gensym_uniq(prefix) macro_gensym2_(prefix, __COUNTER__) // Commented since I am going to use __COUNTER__ for profile events | |
#define macro_gensym_line(prefix) macro_gensym2_(prefix, __LINE__) | |
#define macro_gensym_func(prefix) macro_gensym2_(prefix, __func__) | |
#define macro_gensym_file(prefix) macro_gensym2_(prefix, __FILE__) | |
#if defined(ENV_LANG_C) && ((ENV_LANG_C < 2023) || (defined(ENV_COMPILER_MSVC))) // Remove when MSVC properly supports C23 | |
# define nullptr NULL | |
#endif | |
#if (defined(ENV_LANG_C) && ENV_LANG_C == 2011) | |
# define static_assert(...) _Static_assert(__VA_ARGS__) | |
#elif (defined(ENV_LANG_C) && ENV_LANG_C < 2011) | |
# if defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG) | |
# define static_assert__HELPER(expr, msg) \ | |
(!!sizeof (struct { unsigned int macro_gensym_line(STATIC_ASSERTION__): (expr) ? 1 : -1; })) | |
# define static_assert(expr, msg) \ | |
extern int (*assert_function__(void)) [static_assert__HELPER(expr, msg)] | |
# elif defined(ENV_COMPILER_MSVC) | |
# define static_assert(expr, msg) \ | |
extern char macro_gensym_line(STATIC_ASSERTION__1__)[1]; extern char macro_gensym_line(STATIC_ASSERTION__2__)[(expr)?1:2] | |
# endif | |
#endif | |
#define global_variable static | |
#define global_immutable static const | |
#define persistent_variable static | |
#define persistent_immutable static const | |
#define internal_function static | |
#define header_function static inline | |
#if defined(ENV_COMPILER_MSVC) || defined(ENV_COMPILER_CLANG_WITH_MSVC) | |
# if defined(BUILD_DLL) | |
# define exported_function __declspec(dllexport) | |
# else | |
# define exported_function __declspec(dllimport) | |
# endif | |
#elif defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG) | |
# define exported_function __attribute__((visibility("default"))) | |
#endif | |
#if defined(ENV_COMPILER_MSVC) || defined(ENV_COMPILER_CLANG_WITH_MSVC) | |
# define exported_data __declspec(dllexport) | |
#elif defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG) | |
# define exported_data __attribute__((visibility("default"))) | |
#endif | |
#if defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG) | |
#define packed_data(...) __VA_ARGS__ __attribute__((__packed__)) | |
#elif defined(ENV_COMPILER_MSVC) | |
#define packed_data(...) __pragma( pack(push, 1) ) __VA_ARGS__ __pragma( pack(pop)) | |
#endif | |
#if defined(__has_c_attribute) | |
# if __has_c_attribute(fallthrough) | |
# define switch_fallthrough [[fallthrough]] | |
# endif | |
#elif defined(__cplusplus) && defined(__has_cpp_attribute) | |
# if __has_cpp_attribute(fallthrough) | |
# define switch_fallthrough [[fallthrough]] | |
# endif | |
#endif | |
#if !defined(switch_fallthrough) | |
# if defined(ENV_COMPILER_GCC) && __GNUC__ >= 7 | |
# define switch_fallthrough __attribute__ ((fallthrough)) | |
# elif defined(ENV_COMPILER_CLANG) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 9)) | |
# define switch_fallthrough __attribute__ ((fallthrough)) | |
# else | |
# define switch_fallthrough | |
# endif | |
#endif | |
/* Macros ****************************************************************************/ | |
#define elemin(array) (sizeof(array)/sizeof((array)[0])) | |
// Type-casts | |
#if defined(ENV_LANG_C) | |
# define cast_bit(T, m) (*((T*)(&(m)))) /* Reading float as int, float has to be stored in a variable */ | |
# define cast_ptr(T, v) ((T)(v)) /* Casting float* to int* */ | |
# define cast_val(T, v) ((T)(v)) /* Convering float to int */ | |
# define cast_const(T, v) ((T)(v)) /* Removing const */ | |
#elif defined(ENV_LANG_CXX) | |
template<class To, class From> | |
To cast_bit_func(const From& src) noexcept { | |
static_assert(sizeof(To) == sizeof(From)); | |
union { From f; To t; } u; | |
u.f = src; | |
return u.t; // implementation-defined/UB by strict standard, but works on many compilers | |
} | |
# define cast_bit(T, m) (cast_bit_func<T>(m)) /* Reading float as int, float has to be stored in a variable */ | |
# define cast_ptr(T, v) (reinterpret_cast<T>(v)) /* Casting float* to int* */ | |
# define cast_val(T, v) (static_cast<T>(v)) /* Convering float to int */ | |
# define cast_const(T, v) (const_cast<T>(v)) /* Removing const */ | |
#endif | |
#if defined(ENV_LANG_CXX) | |
# define typeof(...) decltype(__VA_ARGS__) | |
#endif | |
// Compound Literal | |
#if defined(ENV_LANG_C) | |
# define compound_init(T, ...) (T) __VA_ARGS__ | |
#elif defined(ENV_LANG_CXX) | |
# define compound_init(T, ...) T __VA_ARGS__ | |
#endif | |
#define containerof(ptr, type, member) (cast_val(type*, cast_val(void*, (cast_ptr(Byte*, ptr)) - offsetof(type, member)))) | |
#define unused_variable(var) (void)var | |
// Likely/unlikely for better branch prediction | |
#if defined(ENV_COMPILER_MSVC) | |
# define likely(x) (x) | |
# define unlikely(x) (x) | |
#elif defined(ENV_COMPILER_CLANG) || defined(ENV_COMPILER_GCC) | |
# define likely(x) __builtin_expect(!!(x), 1) | |
# define unlikely(x) __builtin_expect(!!(x), 0) | |
#endif | |
// Defer-like macros | |
#define macro_gensym2_(prefix, suffix) macro_gensym_cat_(prefix, suffix) | |
#define macro_gensym_cat_(prefix, suffix) prefix ## suffix | |
#define macro_entail(...) \ | |
goto macro_gensym_line(jump_to_else); \ | |
\ | |
while (true) \ | |
if (true) { \ | |
__VA_ARGS__; \ | |
break; \ | |
} else macro_gensym_line(jump_to_else): | |
#define macro_envelop(cloak_arg_pre_action, cloak_arg_post_action) \ | |
cloak_arg_pre_action; \ | |
goto macro_gensym_line(jump_to_else); \ | |
\ | |
while (true) \ | |
if (true) { \ | |
cloak_arg_post_action; \ | |
break; \ | |
} else macro_gensym_line(jump_to_else): | |
/* Error handling macros | |
* | |
* Sample Code: | |
* | |
* T *ta = nullptr; *tb = nullptr; | |
* err_try(doing_stuff) { | |
* ta = tCreate(); | |
* if (!ta) err_throw(doing_stuff); | |
* tb = tCreate(); | |
* if (!tb) err_throw(doing_stuff); | |
* stuffDo(ta, tb); | |
* } err_catch(doing_stuff) { | |
* if (ta) tDelete(ta); | |
* if (tb) tDelete(tb); | |
* } | |
*/ | |
#define err_try(fail) Bool fail = false; do | |
#define err_throw(fail) {fail=true;break;}(void)0 | |
#define err_catch(fail) while (0); if (fail) | |
#define eat_semicolon() static_assert(1, "") | |
/* Less noisy pragmas */ | |
#if defined(ENV_COMPILER_CLANG) | |
# define pragma_msvc(P) eat_semicolon() | |
# define pragma_clang(P) _Pragma(P) eat_semicolon() | |
# define pragma_gcc(P) eat_semicolon() | |
#elif defined(ENV_COMPILER_GCC) | |
# define pragma_msvc(P) eat_semicolon() | |
# define pragma_clang(P) eat_semicolon() | |
# define pragma_gcc(P) _Pragma(P) eat_semicolon() | |
#elif defined(ENV_COMPILER_MSVC) | |
# define pragma_msvc(P) _Pragma(P) eat_semicolon() | |
# define pragma_clang(P) eat_semicolon() | |
# define pragma_gcc(P) eat_semicolon() | |
#endif | |
/* Compiler-specific tokens */ | |
#if defined(ENV_COMPILER_CLANG) | |
# define with_msvc(P) | |
# define with_clang(P) P | |
# define with_gcc(P) | |
#elif defined(ENV_COMPILER_GCC) | |
# define with_msvc(P) | |
# define with_clang(P) | |
# define with_gcc(P) P | |
#elif defined(ENV_COMPILER_MSVC) | |
# define with_msvc(P) P | |
# define with_clang(P) | |
# define with_gcc(P) | |
#endif | |
#define with_clang_gcc(P) with_clang(P) with_gcc(P) | |
#define with_clang_msvc(P) with_clang(P) with_msvc(P) | |
#if defined(ENV_LANG_C) && ENV_LANG_C >= 1999 | |
# define static_array_size(size) with_clang_gcc(static size) | |
#else | |
# define static_array_size(size) (size) | |
#endif | |
/* Disable all warnings in headers */ | |
#define disable_warnings() \ | |
pragma_clang("clang diagnostic push"); \ | |
pragma_clang("clang diagnostic ignored \"-Weverything\""); \ | |
pragma_msvc("warning ( push , 0 )") | |
#define enable_warnings() \ | |
pragma_clang("clang diagnostic pop"); \ | |
pragma_msvc("warning ( pop )") | |
#define allow_padding_in_type(...) \ | |
pragma_msvc("warning ( push )"); \ | |
pragma_msvc("warning ( disable: 4820 )"); \ | |
__VA_ARGS__ \ | |
pragma_msvc("warning ( pop )"); | |
/* Stringify contents of a macro */ | |
#define stringify(a) stringify_helper_(a) | |
#define stringify_helper_(a) #a | |
/* Header Files **********************************************************************/ | |
// Function-less C89 headers | |
#include <float.h> | |
#include <limits.h> | |
#include <stdarg.h> | |
#include <stddef.h> | |
// Function-less C99 headers | |
#if (defined(ENV_LANG_C) && (ENV_LANG_C >= 1999)) || (defined(ENV_LANG_CXX) && (ENV_LANG_CXX >= 2011)) | |
# include <inttypes.h> | |
# include <stdbool.h> | |
# include <stdint.h> | |
#endif | |
// Function-less C11 headers | |
// stdalign | |
#if (defined(ENV_LANG_C) && (ENV_LANG_C >= 2011)) || (defined(ENV_LANG_CXX) && (ENV_LANG_CXX >= 2011)) | |
# include <stdalign.h> | |
#else | |
# define alignof(...) __alignof__(__VA_ARGS__) | |
typedef struct max_align_t { | |
char align_buf[__BIGGEST_ALIGNMENT__] __attribute__((aligned(__BIGGEST_ALIGNMENT__))); | |
} max_align_t; | |
#endif | |
#if defined(ENV_ARCH_X64) || defined(ENV_ARCH_X86) | |
# include <pmmintrin.h> // SSE3, 100% support as per Steam Hardware Survey | |
#endif | |
#if defined(ENV_COMPILER_MSVC) | |
// WARN(naman): intrin0.inl.h is an internal header. This might break. | |
// NOTE(naman): We include intrin0.inl.h instead of the documented intrin.h because intrin.h includes | |
// EVERY possible intrinsic, and we don't want that (the whole point of only include headers | |
// SSE3 above). | |
# include <intrin0.inl.h> | |
#else | |
# if defined(ENV_ARCH_X64) || defined(ENV_ARCH_X86) | |
# include <x86intrin.h> // Add with carry | |
# endif | |
#endif | |
// FIXME(naman): MSVC doesn't seem to define max_align_t for C11 code, so this will have to suffice for now. | |
// For allocation alignments, see docs.microsoft.com/en-us/cpp/c-runtime-library/reference/malloc?view=msvc-160 | |
#if defined(ENV_COMPILER_MSVC) | |
# if defined(ENV_LANG_C) | |
# if defined(ENV_ARCH_x86) | |
typedef struct { double a; } max_align_t; // 8-bytes aligned (docs.microsoft.com/en-us/cpp/c-runtime-library/reference/malloc?view=msvc-160) | |
static_assert(alignof(max_align_t) == 8, "Alignment of max_align_t is not 8"); | |
# elif defined(ENV_ARCH_X64) | |
typedef struct { __m128 sse; } max_align_t; // 16-byte aligned (docs.microsoft.com/en-us/cpp/cpp/m128?view=msvc-160) | |
static_assert(alignof(max_align_t) == 16, "Alignment of max_align_t is not 16"); | |
# endif | |
# elif defined(ENV_LANG_CXX) | |
# include <cstddef> | |
typedef std::max_align_t max_align_t; | |
# endif | |
#endif | |
#if defined(ENV_COMPILER_CLANG) || defined(ENV_COMPILER_GCC) | |
# if defined(BUILD_SANITIZERS) && (__has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)) | |
disable_warnings(); | |
# define _DISABLE_STL_ANNOTATION | |
# include <sanitizer/asan_interface.h> | |
# undef _DISABLE_STL_ANNOTATION | |
enable_warnings(); | |
# define ENV_SANITIZER_ADDRESS | |
# endif | |
#endif | |
/* Primitive Types *******************************************************************/ | |
typedef int Sint; | |
typedef unsigned int Uint; | |
#include <stdint.h> | |
typedef int8_t Sint8; | |
typedef int16_t Sint16; | |
typedef int32_t Sint32; | |
typedef int64_t Sint64; | |
typedef uint8_t Uint8; | |
typedef uint16_t Uint16; | |
typedef uint32_t Uint32; | |
typedef uint64_t Uint64; | |
typedef float Float32; | |
typedef double Float64; | |
typedef size_t Size; | |
#if defined(ENV_OS_LINUX) | |
typedef ssize_t SSize; | |
#elif defined(ENV_OS_WINDOWS) | |
# include <basetsd.h> | |
typedef SSIZE_T SSize; | |
#endif | |
typedef uintptr_t Uptr; | |
typedef intptr_t Sptr; | |
typedef ptrdiff_t Dptr; | |
typedef unsigned char Byte; | |
typedef char Char; | |
#if defined(ENV_LANG_C) && (ENV_LANG_C >= 1999) | |
typedef _Bool Bool; | |
#elif defined(ENV_LANG_C) && (ENV_LANG_C == 2011) | |
typedef _Bool Bool; | |
# else | |
typedef bool Bool; | |
#endif | |
// NOTE(naman): We define our own true & false because by default, they are defined as 1 & 0 and that doesn't play well with _Generic. | |
pragma_clang("clang diagnostic push"); | |
pragma_clang("clang diagnostic ignored \"-Wkeyword-macro\""); | |
#if defined(ENV_LANG_C) | |
# undef true | |
# define true ((Bool)+1) | |
# undef false | |
# define false ((Bool)+0) | |
#endif | |
pragma_clang("clang diagnostic pop"); | |
/* Helper Stuff **********************************************************************/ | |
#if defined(ENV_LANG_C) | |
# define maximum(x, y) ((x) > (y) ? (x) : (y)) | |
# define minimum(x, y) ((x) < (y) ? (x) : (y)) | |
#elif defined(ENV_LANG_CXX) | |
template<typename T> | |
const T& maximum (const T& a, const T& b) | |
{ | |
return (a < b) ? b : a; | |
} | |
template<typename T> | |
const T& minimum (const T& a, const T& b) | |
{ | |
return (a > b) ? b : a; | |
} | |
#endif | |
#define memzero(_x) memset((_x), 0, sizeof(*(_x))) | |
#if defined(ENV_LANG_C) | |
# define valzero(_t) (_t){} | |
#elif defined(ENV_LANG_CXX) | |
# define valzero(_t) _t{} | |
#endif | |
#if defined(BUILD_INTERNAL) | |
# if defined(ENV_OS_WINDOWS) | |
# define breakpoint() __debugbreak() | |
# elif defined(ENV_OS_LINUX) | |
# if defined(ENV_ARCH_X86) || defined(ENV_ARCH_X64) | |
# if defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG) | |
# define breakpoint() __asm__ volatile("int $0x03") | |
# endif // !GCC && !Clang | |
# endif // !x86 && !x64 | |
# endif // !window && !linux | |
#else | |
# define breakpoint() do {} while (0) | |
#endif | |
#define debugBreak() breakpoint() | |
#define debugAssert(...) do { if (!(__VA_ARGS__)) { debugBreak(); } } while (0) | |
#if defined(BUILD_INTERNAL) | |
global_immutable Bool debug_unreachable_code_executed = false; | |
# define debugUnreachable() debugAssert(debug_unreachable_code_executed == true) | |
#else | |
# if defined(ENV_COMPILER_MSVC) | |
# define debugUnreachable() __assume(false) | |
# elif defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG) | |
# define debugUnreachable() __builtin_unreachable() | |
# endif | |
#endif | |
#define COMPARE_FUNC(name) Sint name (const void *a, const void *b) | |
#if defined(ENV_OS_WINDOWS) | |
# if defined(ENV_LANG_CXX) | |
extern "C" { | |
# endif | |
void * __cdecl memset(void *, Sint, Size); _Pragma("intrinsic(memset)") | |
void * __cdecl memcpy(void *, const void *, Size); _Pragma("intrinsic(memcpy)") | |
int __cdecl memcmp(const void*, const void*, Size); _Pragma("intrinsic(memcmp)") | |
Size __cdecl strlen(const char*); _Pragma("intrinsic(strlen)") | |
header_function | |
Sint strlcmp (const Char *a, Size al, const Char *b) | |
{ | |
Size bl = strlen(b); | |
/**/ if (al > bl) return a[bl]; | |
else if (al < bl) return -b[al]; | |
else { | |
Sint result = memcmp(a, b, minimum(al, bl)); | |
return result; | |
} | |
} | |
header_function | |
Sint strcmp (const Char *a, const Char *b) | |
{ | |
Size al = strlen(a); | |
Sint result = strlcmp(a, al, b); | |
return result; | |
} | |
header_function | |
Bool strleq (const Char *a, Size al, const Char *b) | |
{ | |
return strlcmp(a, al, b) == 0; | |
} | |
header_function | |
Bool streq (const Char *a, const Char *b) | |
{ | |
return strcmp(a, b) == 0; | |
} | |
# if defined(ENV_LANG_CXX) | |
} | |
# endif | |
#else | |
# include <string.h> | |
#endif | |
/* ********************************************************************************* | |
* Base 32 Encoding | |
* **************** | |
* Inspired by Crockford's Base 32 encoding (https://www.crockford.com/base32.html) | |
* | |
* Changes: | |
* | |
* 1. U/u is decoded to the same integer as V/v. | |
* When encoding, v is the right value to encode to. | |
* | |
* 2. For the extra check symbols, use - . _ ~ u, since | |
* these are the symbols considered unreserved in URLs | |
* See section 2.3 of https://www.ietf.org/rfc/rfc3986.txt | |
* | |
* (Explaination: If checksum is required, the integer | |
* value of the string is divided by 37 (the next prime | |
* after 32) and the encoded remainder is appended after | |
* the string. Since the value will be <= 37, five extra | |
* symbols are required). | |
* | |
* 3. The encoded characters are lower-case, not upper case | |
* since upper case looks like screaming. Also, this way, | |
* alphabets and numbers are a bit more distinguishable. | |
* And it reads like r0x0ring h4x0r 13375p34k, n00bs4uc3! | |
* (Obligatory: https://megatokyo.com/strip/9) | |
*/ | |
header_function | |
Uint8 b32DecodeChar (Char code) // Character to integer | |
{ | |
switch (code) { | |
case 'O': case 'o': | |
case '0': return 0; | |
case 'I': case 'i': | |
case 'L': case 'l': | |
case '1': return 1; | |
case '2': return 2; | |
case '3': return 3; | |
case '4': return 4; | |
case '5': return 5; | |
case '6': return 6; | |
case '7': return 7; | |
case '8': return 8; | |
case '9': return 9; | |
case 'A': case 'a': return 10; | |
case 'B': case 'b': return 11; | |
case 'C': case 'c': return 12; | |
case 'D': case 'd': return 13; | |
case 'E': case 'e': return 14; | |
case 'F': case 'f': return 15; | |
case 'G': case 'g': return 16; | |
case 'H': case 'h': return 17; | |
case 'J': case 'j': return 18; | |
case 'K': case 'k': return 19; | |
case 'M': case 'm': return 20; | |
case 'N': case 'n': return 21; | |
case 'P': case 'p': return 22; | |
case 'Q': case 'q': return 23; | |
case 'R': case 'r': return 24; | |
case 'S': case 's': return 25; | |
case 'T': case 't': return 26; | |
case 'U': case 'u': | |
case 'V': case 'v': return 27; | |
case 'W': case 'w': return 28; | |
case 'X': case 'x': return 29; | |
case 'Y': case 'y': return 30; | |
case 'Z': case 'z': return 31; | |
default: debugUnreachable(); return 0; | |
} | |
} | |
header_function | |
Uint8 b32DecodeChecksumChar (Char check) // Character to integer | |
{ | |
switch (check) { | |
case '-': return 32; | |
case '.': return 33; | |
case '_': return 34; | |
case '~': return 35; | |
case 'u': return 36; | |
default: return b32DecodeChar(check); | |
} | |
} | |
header_function | |
Char b32EncodeChar (Uint8 val) // Integer to character | |
{ | |
switch (val) { | |
case 0: return '0'; case 1: return '1'; case 2: return '2'; case 3: return '3'; | |
case 4: return '4'; case 5: return '5'; case 6: return '6'; case 7: return '7'; | |
case 8: return '8'; case 9: return '9'; case 10: return 'a'; case 11: return 'b'; | |
case 12: return 'c'; case 13: return 'd'; case 14: return 'e'; case 15: return 'f'; | |
case 16: return 'g'; case 17: return 'h'; case 18: return 'j'; case 19: return 'k'; | |
case 20: return 'm'; case 21: return 'n'; case 22: return 'p'; case 23: return 'q'; | |
case 24: return 'r'; case 25: return 's'; case 26: return 't'; case 27: return 'v'; | |
case 28: return 'w'; case 29: return 'x'; case 30: return 'y'; case 31: return 'z'; | |
default: debugUnreachable(); return 0; | |
} | |
} | |
header_function | |
Char b32EncodeChecksumChar (Uint8 val) // Integer to character | |
{ | |
switch (val) { | |
case 32: return '-'; | |
case 33: return '.'; | |
case 34: return '_'; | |
case 35: return '~'; | |
case 36: return 'u'; | |
default: return b32EncodeChar(val); | |
} | |
} | |
header_function | |
Bool b32VerifyInputString (Char *str, Size len, Char checksum) | |
{ | |
Uint8 mod = 0; | |
for (Size i = 0; i < len; i++) { | |
Bool valid = false; | |
valid |= (str[i] >= 0x30) && (str[i] <= 0x39); // 0-9 | |
valid |= (str[i] >= 0x61) && (str[i] <= 0x68); // a-h | |
valid |= (str[i] == 0x6a); // j | |
valid |= (str[i] == 0x6b); // k | |
valid |= (str[i] == 0x6d); // m | |
valid |= (str[i] == 0x6e); // n | |
valid |= (str[i] >= 0x70) && (str[i] <= 0x74); // p-t | |
valid |= (str[i] >= 0x76) && (str[i] <= 0x7a); // v-z | |
valid |= (str[i] >= 0x41) && (str[i] <= 0x48); // A-H | |
valid |= (str[i] == 0x4a); // j | |
valid |= (str[i] == 0x4b); // k | |
valid |= (str[i] == 0x4d); // m | |
valid |= (str[i] == 0x4e); // n | |
valid |= (str[i] >= 0x50) && (str[i] <= 0x54); // p-t | |
valid |= (str[i] >= 0x56) && (str[i] <= 0x5a); // v-z | |
if (!valid) return false; | |
mod = (mod + b32DecodeChar(str[i])) % 37u; | |
} | |
if (checksum) { | |
Uint8 check_car = b32DecodeChecksumChar(checksum); | |
Uint8 check_val = mod; | |
if (check_val != check_car) return false; | |
} | |
return true; | |
} | |
header_function | |
Char b32GetChecksumChar (Char *str, Size len) | |
{ | |
Uint8 mod = 0; | |
for (Size i = 0; i < len; i++) { | |
mod = (mod + b32DecodeChar(str[i])) % 37u; | |
} | |
Char check = b32EncodeChecksumChar(mod); | |
return check; | |
} | |
/************************************************************************************ | |
* Unicode Processing | |
*/ | |
// TODO(naman): Go through these security guidelines for safe UTF-8 processing: | |
// https://security.stackexchange.com/questions/257017/what-are-best-practices-for-handling-user-unicode-in-a-web-application | |
/* | |
* First codepoint Last codepoint Byte 1 Byte 2 Byte 3 Byte 4 | |
* U+0000 U+007F 0yyyzzzz | |
* U+0080 U+07FF 110xxxyy 10yyzzzz | |
* U+0800 U+FFFF 1110wwww 10xxxxyy 10yyzzzz | |
* U+010000 U+10FFFF 11110uvv 10vvwwww 10xxxxyy 10yyzzzz | |
*/ | |
// UTF32 to UTF8 | |
header_function | |
Size unicodeEncode (Uint32 code, Byte buffer[static_array_size(4)]) | |
{ | |
if (buffer == nullptr) return 0; | |
if (code <= 0x7F) { | |
buffer[0] = cast_val(Byte, code); // 0yyyzzzz | |
return 1; | |
} else if (code <= 0x7FF) { | |
buffer[0] = cast_val(Byte, cast_val(Byte, 0xC0) | cast_val(Byte, (code >> 6))); // 110xxxyy | |
buffer[1] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, (code & 0x3F))); // 10yyzzzz | |
return 2; | |
} else if (code <= 0xFFFF) { | |
buffer[0] = cast_val(Byte, cast_val(Byte, 0xE0) | cast_val(Byte, (code >> 12))); // 1110wwww | |
buffer[1] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, ((code >> 6) & 0x3F))); // 10xxxxyy | |
buffer[2] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, (code & 0x3F))); // 10yyzzzz | |
return 3; | |
} else if (code <= 0x10FFFF) { | |
buffer[0] = cast_val(Byte, cast_val(Byte, 0xF0) | cast_val(Byte, (code >> 18))); // 11110uvv | |
buffer[1] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, ((code >> 12) & 0x3F))); // 10vvwwww | |
buffer[2] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, ((code >> 6) & 0x3F))); // 10xxxxyy | |
buffer[3] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, (code & 0x3F))); // 10yyzzzz | |
return 4; | |
} else { | |
return 0; // Invalid codepoint | |
} | |
} | |
// UTF8 to UTF32 | |
header_function | |
Bool unicodeDecode (const Byte *buffer, Size len, Size *offset, Uint32 *codepoint) | |
{ | |
if (buffer == nullptr) return 0; | |
Size temp1 = 0; if (offset == nullptr) offset = &temp1; | |
Uint32 temp2 = 0; if (codepoint == nullptr) codepoint = &temp2; | |
Size o = *offset; | |
len -= o; | |
*codepoint = 0; | |
if (len == 0) return false; | |
Uint32 b0 = buffer[o+0]; | |
*offset += 1; | |
if (b0 == 0) return true; // nullptr-byte | |
if ((b0 & 0x80) == 0) { // 0x80 = 10000000 | |
*codepoint = b0; | |
return true; | |
} | |
if (len == 1) return false; | |
Uint32 b1 = buffer[o+1]; | |
*offset += 1; | |
if (b1 == 0) return false; | |
Uint32 BA1 = b1 & 0xC0; // 0xC0 = 11000000 | |
Uint32 BB1 = b1 & 0x3F; // 0x3F = 00111111 | |
if ((b0 & 0xE0) == 0xC0) { // 0xE0 = 11100000 , 0xC0 = 11000000 | |
if (BA1 != 0x80) return false; // 0x80 = 10000000 | |
Uint32 B = b0 & 0x1F; // 0x1F = 00011111 | |
*codepoint = (B << 6) | BB1; | |
return true; | |
} | |
if (len == 2) return false; | |
Uint32 b2 = buffer[o+2]; | |
*offset += 1; | |
if (b2 == 0) return false; | |
Uint32 BA2 = b2 & 0xC0; // 0xC0 = 11000000 | |
Uint32 BB2 = b2 & 0x3F; // 0x3F = 00111111 | |
if ((b0 & 0xF0) == 0xE0) { // 0xF0 = 11110000 , 0xE0 = 11100000 | |
if (BA1 != 0x80) return false; // 0x80 = 10000000 | |
if (BA2 != 0x80) return false; // 0x80 = 10000000 | |
Uint32 B = b0 & 0x0F; // 0x0F = 00001111 | |
*codepoint = (B << 12) | (BB1 << 6) | BB2; | |
return true; | |
} | |
if (len == 3) return false; | |
Uint32 b3 = buffer[o+3]; | |
*offset += 1; | |
if (b3 == 0) return false; | |
Uint32 BA3 = b3 & 0xC0; // 0xC0 = 11000000 | |
Uint32 BB3 = b3 & 0x3F; // 0x3F = 00111111 | |
if ((b0 & 0xF8) == 0xF0) { // 0xF8 = 11111000 , 0xF0 = 11110000 | |
if (BA1 != 0x80) return false; // 0x80 = 10000000 | |
if (BA2 != 0x80) return false; // 0x80 = 10000000 | |
if (BA3 != 0x80) return false; // 0x80 = 10000000 | |
Uint32 B = b0 & 0x07; // 0x07 = 00000111 | |
Uint32 result = (B << 18) | (BB1 << 12) | (BB2 << 6) | BB3; | |
if (result > 0x10FFFF) return false; | |
*codepoint = result; | |
return true; | |
} | |
return false; | |
} | |
/********************************************************************** | |
* Bitwise Delight | |
*/ | |
// NOTE(naman): Bit vectors are supposed to be zero-indexed. | |
// NOTE(naman): Base type of bit vectors shouldn't have size > 8 bytes (to prevent shift overflow). | |
#define bitToBytes(b) (((b)+(CHAR_BIT-1))/(CHAR_BIT)) | |
#define bit_ValInBuf(array, index) ((index)/(CHAR_BIT * sizeof(*(array)))) | |
#define bit_BitInVal(array, index) ((index)%(CHAR_BIT * sizeof(*(array)))) | |
#define bitSet(array, index) \ | |
((array)[bit_ValInBuf(array, index)] |= (1LLU << bit_BitInVal(array, index))) | |
#define bitReset(array, index) \ | |
((array)[bit_ValInBuf(array, index)] &= ~(1LLU << bit_BitInVal(array, index))) | |
#define bitToggle(array, index) \ | |
((array)[bit_ValInBuf(array, index)] ^= ~(1LLU << bit_BitInVal(array, index))) | |
#define bitTest(array, index) \ | |
((array)[bit_ValInBuf(array, index)] & (1LLU << bit_BitInVal(array, index))) | |
header_function | |
Uint8 bitMSBU32 (Uint32 x) | |
{ | |
debugAssert(x); | |
Uint8 result; | |
#if defined(ENV_COMPILER_MSVC) | |
// _BitScanReverse: Search MSB->LSB and load the position of first bit in location (zero-indexed) | |
unsigned long msb_location = 0; | |
debugAssert(_BitScanReverse(&msb_location, x)); | |
result = cast_val(Uint8, msb_location); | |
#else | |
// __builtin_clz: Returns the number of leading 0-bits starting from MSB | |
Sint maybe_set_bits = 32 - __builtin_clz(x); | |
Sint msb_location = maybe_set_bits - 1; // Zero indexing | |
result = cast_val(Uint8, msb_location); | |
#endif | |
return result; | |
} | |
header_function | |
Uint8 bitMSBU64 (Uint64 x) | |
{ | |
debugAssert(x); | |
Uint8 result; | |
#if defined(ENV_COMPILER_MSVC) | |
// _BitScanReverse64: Search MSB->LSB and load the position of first bit in location (zero-indexed) | |
unsigned long msb_location = 0; | |
debugAssert(_BitScanReverse64(&msb_location, x)); | |
result = cast_val(Uint8, msb_location); | |
#else | |
// __builtin_clzll: Returns the number of leading 0-bits starting from MSB | |
Sint maybe_set_bits = 64 - __builtin_clzll(x); | |
Sint msb_location = maybe_set_bits - 1; // Zero indexing | |
result = cast_val(Uint8, msb_location); | |
#endif | |
return result; | |
} | |
header_function | |
Uint8 bitLSBU32 (Uint32 x) | |
{ | |
debugAssert(x); | |
Uint8 result; | |
#if defined(ENV_COMPILER_MSVC) | |
// _BitScanForward: Search LSB->MSB and load the position of first bit in location (zero-indexed) | |
unsigned long lsb_location = 0; | |
debugAssert(_BitScanForward(&lsb_location, x)); | |
result = cast_val(Uint8, lsb_location); | |
#else | |
// __builtin_ctz: Returns the number of trailing 0-bits starting from LSB | |
Sint lsb_location = __builtin_ctz(x); | |
result = cast_val(Uint8, lsb_location); | |
#endif | |
return result; | |
} | |
header_function | |
Uint8 bitLSBU64 (Uint64 x) | |
{ | |
debugAssert(x); | |
Uint8 result; | |
#if defined(ENV_COMPILER_MSVC) | |
// _BitScanForward: Search LSB->MSB and load the position of first bit in location (zero-indexed) | |
unsigned long lsb_location = 0; | |
debugAssert(_BitScanForward64(&lsb_location, x)); | |
result = cast_val(Uint8, lsb_location); | |
#else | |
// __builtin_ctzll: Returns the number of trailing 0-bits starting from LSB | |
Sint lsb_location = __builtin_ctzll(x); | |
result = cast_val(Uint8, lsb_location); | |
#endif | |
return result; | |
} | |
#if defined(ENV_LANG_C) && ENV_LANG_C >= 2011 | |
# define bitMSB(_x) _Generic((_x), Uint32: bitMSBU32, Uint64: bitMSBU64)(_x) | |
# define bitLSB(_x) _Generic((_x), Uint32: bitLSBU32, Uint64: bitLSBU64)(_x) | |
#elif defined(ENV_LANG_CXX) | |
header_function Uint8 bitMSB (Uint32 x) { return bitMSBU32(x); } | |
header_function Uint8 bitMSB (Uint64 x) { return bitMSBU64(x); } | |
header_function Uint8 bitLSB (Uint32 x) { return bitLSBU32(x); } | |
header_function Uint8 bitLSB (Uint64 x) { return bitLSBU64(x); } | |
#endif // !LANG_C && !LANG_CPP | |
header_function | |
Uint64 bitPow2U32 (Uint32 x) | |
{ | |
Uint64 y = 1ull << x; | |
return y; | |
} | |
header_function | |
Uint64 bitPow2U64 (Uint64 x) | |
{ | |
Uint64 y = 1ull << x; | |
return y; | |
} | |
header_function | |
Uint8 bitLog2U32 (Uint32 x) | |
{ | |
debugAssert(x); | |
Uint8 y = x ? bitMSBU32(x) : 0u; | |
return y; | |
} | |
header_function | |
Uint8 bitLog2U64 (Uint64 x) | |
{ | |
debugAssert(x); | |
Uint8 y = x ? bitMSBU64(x) : 0u; | |
return y; | |
} | |
header_function | |
Bool bitIsPowerOf2U32 (Uint32 x) | |
{ | |
debugAssert(x); | |
Bool b = (x & (x - 1)) == 0; | |
return b; | |
} | |
header_function | |
Bool bitIsPowerOf2U64 (Uint64 x) | |
{ | |
debugAssert(x); | |
Bool b = (x & (x - 1)) == 0; | |
return b; | |
} | |
header_function | |
Uint64 bitPrevPowerOf2U32 (Uint32 x) | |
{ | |
debugAssert(x); | |
Uint64 y = bitIsPowerOf2U32(x) ? (1u << (bitLog2U32(x) - 1u)) : x; | |
return y; | |
} | |
header_function | |
Uint64 bitPrevPowerOf2U64 (Uint64 x) | |
{ | |
debugAssert(x); | |
Uint64 y = bitIsPowerOf2U64(x) ? (1ull << (bitLog2U64(x) - 1ull)) : x; | |
return y; | |
} | |
header_function | |
Uint64 bitNextPowerOf2U32 (Uint32 x) | |
{ | |
debugAssert(x); | |
Uint64 y = bitIsPowerOf2U32(x) ? (1u << (bitLog2U32(x) + 1u)) : x; | |
return y; | |
} | |
header_function | |
Uint64 bitNextPowerOf2U64 (Uint64 x) | |
{ | |
debugAssert(x); | |
Uint64 y = bitIsPowerOf2U64(x) ? (1ull << (bitLog2U64(x) + 1ull)) : x; | |
return y; | |
} | |
// Number of leading zeros (on the most significant end) | |
header_function | |
Uint8 bitMSZerosU32 (Uint32 x) | |
{ | |
debugAssert(x); | |
Uint8 clz = 32U - bitMSBU32(x) - 1U; | |
return clz; | |
} | |
header_function | |
Uint8 bitMSZerosU64 (Uint64 x) | |
{ | |
debugAssert(x); | |
Uint8 clz = 64U - bitMSBU64(x) - 1U; | |
return clz; | |
} | |
// square root functions from Mark Dickinson | |
// https://stackoverflow.com/a/70550680/12862673 | |
header_function | |
Uint16 bitSqrtU32 (Uint32 x) | |
{ | |
Sint lz = bitMSZerosU32(x | 1U) & 0x1E; | |
x <<= lz; | |
Uint32 y = 1u + (x >> 30); | |
y = (y << 1) + (x >> 27) / y; | |
y = (y << 3) + (x >> 21) / y; | |
y = (y << 7) + (x >> 9) / y; | |
y -= x < cast_val(Uint32, y) * y; | |
return cast_val(Uint16, (y >> (lz >> 1))); | |
} | |
header_function | |
Uint32 bitSqrtU64 (Uint64 x) | |
{ | |
Uint8 lz = cast_val(Uint8, bitMSZerosU64(x | 1ull) & 0x3Eu); | |
x <<= lz; | |
Uint64 y = 2ull + (x >> 63); | |
y = (y << 1) + (x >> 59) / y; | |
y = (y << 3) + (x >> 53) / y; | |
y = (y << 7) + (x >> 41) / y; | |
y = (y << 15) + (x >> 17) / y; | |
y -= x < cast_val(Uint64, y) * y; | |
return cast_val(Uint32, (y >> (lz >> 1))); | |
} | |
#if defined(ENV_LANG_C) && ENV_LANG_C >= 2011 | |
# define bitPow2(x) _Generic((x), Uint32: bitPow2U32, Uint64: bitPow2U64)(x) | |
# define bitLog2(x) _Generic((x), Uint32: bitLog2U32, Uint64: bitLog2U64)(x) | |
# define bitIsPowerOf2(x) _Generic((x), Uint32: bitIsPowerOf2U32, Uint64: bitIsPowerOf2U64)(x) | |
# define bitPrevPowerOf2(x) _Generic((x), Uint32: bitPrevPowerOf2U32, Uint64: bitPrevPowerOf2U64)(x) | |
# define bitNextPowerOf2(x) _Generic((x), Uint32: bitNextPowerOf2U32, Uint64: bitNextPowerOf2U64)(x) | |
# define bitSqrt(x) _Generic((x), Uint32: bitSqrtU32, Uint64: bitSqrtU64)(x) | |
#elif defined(ENV_LANG_CXX) | |
header_function Uint64 bitPow2 (Uint32 x) { return bitPow2U32(x); } | |
header_function Uint64 bitPow2 (Uint64 x) { return bitPow2U64(x); } | |
header_function Uint8 bitLog2 (Uint32 x) { return bitLog2U32(x); } | |
header_function Uint8 bitLog2 (Uint64 x) { return bitLog2U64(x); } | |
header_function Bool bitIsPowerOf2 (Uint32 x) { return bitIsPowerOf2U32(x); } | |
header_function Bool bitIsPowerOf2 (Uint64 x) { return bitIsPowerOf2U64(x); } | |
header_function Uint64 bitPrevPowerOf2 (Uint32 x) { return bitPrevPowerOf2U32(x); } | |
header_function Uint64 bitPrevPowerOf2 (Uint64 x) { return bitPrevPowerOf2U64(x); } | |
header_function Uint64 bitNextPowerOf2 (Uint32 x) { return bitNextPowerOf2U32(x); } | |
header_function Uint64 bitNextPowerOf2 (Uint64 x) { return bitNextPowerOf2U64(x); } | |
header_function Uint16 bitSqrt (Uint32 x) { return bitSqrtU32(x); } | |
header_function Uint32 bitSqrt (Uint64 x) { return bitSqrtU64(x); } | |
#endif | |
/************************************************************************************* | |
* Serialization | |
*/ | |
header_function | |
void srlzWrite8 (Uint8 src, Byte *dest) | |
{ | |
dest[0] = src; | |
} | |
#define srlzWrite8BE srlzWrite8 | |
#define srlzWrite8LE srlzWrite8 | |
header_function | |
void srlzWrite16LE (Uint16 src, Byte *dest) | |
{ | |
dest[0] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000); | |
dest[1] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010); | |
} | |
header_function | |
void srlzWrite16BE (Uint16 src, Byte *dest) | |
{ | |
dest[0] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010); | |
dest[1] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000); | |
} | |
header_function | |
void srlzWrite32LE (Uint32 src, Byte *dest) | |
{ | |
dest[0] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000); | |
dest[1] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010); | |
dest[2] = cast_val(Uint8, (0x0000000000FF0000 & (src)) >> 020); | |
dest[3] = cast_val(Uint8, (0x00000000FF000000 & (src)) >> 030); | |
} | |
header_function | |
void srlzWrite32BE (Uint32 src, Byte *dest) | |
{ | |
dest[0] = cast_val(Uint8, (0x00000000FF000000 & (src)) >> 030); | |
dest[1] = cast_val(Uint8, (0x0000000000FF0000 & (src)) >> 020); | |
dest[2] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010); | |
dest[3] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000); | |
} | |
header_function | |
void srlzWrite64LE (Uint64 src, Byte *dest) | |
{ | |
dest[0] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000); | |
dest[1] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010); | |
dest[2] = cast_val(Uint8, (0x0000000000FF0000 & (src)) >> 020); | |
dest[3] = cast_val(Uint8, (0x00000000FF000000 & (src)) >> 030); | |
dest[4] = cast_val(Uint8, (0x000000FF00000000 & (src)) >> 040); | |
dest[5] = cast_val(Uint8, (0x0000FF0000000000 & (src)) >> 050); | |
dest[6] = cast_val(Uint8, (0x00FF000000000000 & (src)) >> 060); | |
dest[7] = cast_val(Uint8, (0xFF00000000000000 & (src)) >> 070); | |
} | |
header_function | |
void srlzWrite64BE (Uint64 src, Byte *dest) | |
{ | |
dest[0] = cast_val(Uint8, (0xFF00000000000000 & (src)) >> 070); | |
dest[1] = cast_val(Uint8, (0x00FF000000000000 & (src)) >> 060); | |
dest[2] = cast_val(Uint8, (0x0000FF0000000000 & (src)) >> 050); | |
dest[3] = cast_val(Uint8, (0x000000FF00000000 & (src)) >> 040); | |
dest[4] = cast_val(Uint8, (0x00000000FF000000 & (src)) >> 030); | |
dest[5] = cast_val(Uint8, (0x0000000000FF0000 & (src)) >> 020); | |
dest[6] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010); | |
dest[7] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000); | |
} | |
header_function | |
Uint8 srlzRead8 (const Byte *src) | |
{ | |
Uint8 result = src[0]; | |
return result; | |
} | |
#define srlzRead8BE srlzRead8 | |
#define srlzRead8LE srlzRead8 | |
header_function | |
Uint16 srlzRead16LE (const Byte *src) { | |
Uint16 result = cast_val(Uint16, cast_val(Uint16, 255 & src[1]) << 8 | cast_val(Uint16, 255 & src[0])); | |
return result; | |
} | |
header_function | |
Uint16 srlzRead16BE (const Byte *src) { | |
Uint16 result = cast_val(Uint16, cast_val(Uint16, 255 & src[0]) << 8 | cast_val(Uint16, 255 & src[1])); | |
return result; | |
} | |
header_function | |
Uint32 srlzRead32LE (const Byte *src) | |
{ | |
Uint32 result = (cast_val(Uint32, 255 & src[3]) << 030 | cast_val(Uint32, 255 & src[2]) << 020 | | |
cast_val(Uint32, 255 & src[1]) << 010 | cast_val(Uint32, 255 & src[0]) << 000); | |
return result; | |
} | |
header_function | |
Uint32 srlzRead32BE (const Byte *src) | |
{ | |
Uint32 result = (cast_val(Uint32, 255 & src[0]) << 030 | cast_val(Uint32, 255 & src[1]) << 020 | | |
cast_val(Uint32, 255 & src[2]) << 010 | cast_val(Uint32, 255 & src[3]) << 000); | |
return result; | |
} | |
header_function | |
Uint64 srlzRead64LE (const Byte *src) | |
{ | |
Uint64 result = (cast_val(Uint64, 255 & src[7]) << 070 | cast_val(Uint64, 255 & src[6]) << 060 | | |
cast_val(Uint64, 255 & src[5]) << 050 | cast_val(Uint64, 255 & src[4]) << 040 | | |
cast_val(Uint64, 255 & src[3]) << 030 | cast_val(Uint64, 255 & src[2]) << 020 | | |
cast_val(Uint64, 255 & src[1]) << 010 | cast_val(Uint64, 255 & src[0]) << 000); | |
return result; | |
} | |
header_function | |
Uint64 srlzRead64BE (const Byte *src) | |
{ | |
Uint64 result = (cast_val(Uint64, 255 & src[0]) << 070 | cast_val(Uint64, 255 & src[1]) << 060 | | |
cast_val(Uint64, 255 & src[2]) << 050 | cast_val(Uint64, 255 & src[3]) << 040 | | |
cast_val(Uint64, 255 & src[4]) << 030 | cast_val(Uint64, 255 & src[5]) << 020 | | |
cast_val(Uint64, 255 & src[6]) << 010 | cast_val(Uint64, 255 & src[7]) << 000); | |
return result; | |
} | |
typedef union srlz_8 { Uint8 u8; Sint8 s8; } srlz_8; | |
typedef union srlz_16 { Uint16 u16; Sint16 s16; } srlz_16; | |
typedef union srlz_32 { Uint32 u32; Sint32 s32; Float32 f32; } srlz_32; | |
typedef union srlz_64 { Uint64 u64; Sint64 s64; Float64 f64; } srlz_64; | |
/************************************************************************************* | |
* Marshalling | |
*/ | |
#define MARSHAL_FUNC(_name) Bool _name (void* data, Size size, void* userdata) | |
header_function | |
Bool marshalUint8 (Uint8 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
unused_variable(big_endian); | |
srlz_8 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.u8 = *datum; | |
srlzWrite8(srlzd.u8, buf); | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
srlzd.u8 = srlzRead8(buf); | |
*datum = srlzd.u8; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalSint8 (Sint8 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
unused_variable(big_endian); | |
srlz_8 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.s8 = *datum; | |
srlzWrite8(srlzd.u8, buf); | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
srlzd.u8 = srlzRead8(buf); | |
*datum = srlzd.s8; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalUint16 (Uint16 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
srlz_16 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.u16 = *datum; | |
if (big_endian) { srlzWrite16BE(srlzd.u16, buf); } | |
else { srlzWrite16LE(srlzd.u16, buf); } | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
if (big_endian) { srlzd.u16 = srlzRead16BE(buf); } | |
else { srlzd.u16 = srlzRead16LE(buf); } | |
*datum = srlzd.u16; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalSint16 (Sint16 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
srlz_16 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.s16 = *datum; | |
if (big_endian) { srlzWrite16BE(srlzd.u16, buf); } | |
else { srlzWrite16LE(srlzd.u16, buf); } | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
if (big_endian) { srlzd.u16 = srlzRead16BE(buf); } | |
else { srlzd.u16 = srlzRead16LE(buf); } | |
*datum = srlzd.s16; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalUint32 (Uint32 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
srlz_32 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.u32 = *datum; | |
if (big_endian) { srlzWrite32BE(srlzd.u32, buf); } | |
else { srlzWrite32LE(srlzd.u32, buf); } | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
if (big_endian) { srlzd.u32 = srlzRead32BE(buf); } | |
else { srlzd.u32 = srlzRead32LE(buf); } | |
*datum = srlzd.u32; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalSint32 (Sint32 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
srlz_32 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.s32 = *datum; | |
if (big_endian) { srlzWrite32BE(srlzd.u32, buf); } | |
else { srlzWrite32LE(srlzd.u32, buf); } | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
if (big_endian) { srlzd.u32 = srlzRead32BE(buf); } | |
else { srlzd.u32 = srlzRead32LE(buf); } | |
*datum = srlzd.s32; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalFloat32 (Float32 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
srlz_32 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.f32 = *datum; | |
if (big_endian) { srlzWrite32BE(srlzd.u32, buf); } | |
else { srlzWrite32LE(srlzd.u32, buf); } | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
if (big_endian) { srlzd.u32 = srlzRead32BE(buf); } | |
else { srlzd.u32 = srlzRead32LE(buf); } | |
*datum = srlzd.f32; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalUint64 (Uint64 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
srlz_64 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.u64 = *datum; | |
if (big_endian) { srlzWrite64BE(srlzd.u64, buf); } | |
else { srlzWrite64LE(srlzd.u64, buf); } | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
if (big_endian) { srlzd.u64 = srlzRead64BE(buf); } | |
else { srlzd.u64 = srlzRead64LE(buf); } | |
*datum = srlzd.u64; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalSint64 (Sint64 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
srlz_64 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.s64 = *datum; | |
if (big_endian) { srlzWrite64BE(srlzd.u64, buf); } | |
else { srlzWrite64LE(srlzd.u64, buf); } | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
if (big_endian) { srlzd.u64 = srlzRead64BE(buf); } | |
else { srlzd.u64 = srlzRead64LE(buf); } | |
*datum = srlzd.s64; | |
} | |
return true; | |
} | |
header_function | |
Bool marshalFloat64 (Float64 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
srlz_64 srlzd = {0}; | |
Byte buf[sizeof(*datum)] = {0}; | |
if (write) { | |
srlzd.f64 = *datum; | |
if (big_endian) { srlzWrite64BE(srlzd.u64, buf); } | |
else { srlzWrite64LE(srlzd.u64, buf); } | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
} else { | |
if (!func(buf, sizeof(*datum), funcdata)) return false; | |
if (big_endian) { srlzd.u64 = srlzRead64BE(buf); } | |
else { srlzd.u64 = srlzRead64LE(buf); } | |
*datum = srlzd.f64; | |
} | |
return true; | |
} | |
/************************************************************************************* | |
* Free Grid | |
*/ | |
/* | |
* row mask |‾‾‾‾‾‾‾‾‾‾side‾‾‾‾‾‾‾‾‾| | |
* lsb | |
* 1 FF FF FF FF FF ‾| | |
* 1 FF FF FF FF FF | | |
* 1 FF FF 07 00 00 side | |
* 0 00 00 00 00 00 | | |
* 0 00 00 00 00 00 _| | |
* msb | |
* 1 1 1 1 1 <- col mask | |
* lsb msb | |
*/ | |
// NOTE(naman): Here, the "correct" thing to do would have been to | |
// calculate the `side` variable using `sqrt(elem_count)`. But even | |
// if we take the maximum possible size, the allocation size is | |
// 32 KiB which is around the minimum allocation we can make with | |
// the root TLSF allocator. So, there is no point in making a smaller | |
// allocation here. | |
#define FG_SIDE 64u | |
// NOTE(naman): 2^18 is 64*64*64 (= 262144), and that is the maximum count we support | |
#define FREE_GRID_MAXIMUM_ELEMENTS (1U << 18) | |
typedef struct Free_Grid { | |
Uint64 masks[FG_SIDE * FG_SIDE * sizeof(Uint64)]; | |
Uint8 count_masks_in_row[FG_SIDE]; | |
Uint8 count_masks_in_col[FG_SIDE]; | |
Uint64 row_mask; | |
Uint64 col_mask; | |
Uint32 count; | |
Uint32 allocated; | |
} Free_Grid; | |
header_function | |
void fgCreate (Free_Grid *fg, Uint32 elem_count) | |
{ | |
debugAssert(elem_count <= FREE_GRID_MAXIMUM_ELEMENTS); | |
memzero(fg); | |
fg->count = elem_count; | |
Uint32 fully_marked_mask_count = elem_count / 64; | |
Uint32 extra_marked_bits_count = elem_count % 64; | |
if (fully_marked_mask_count) memset(fg->masks, 0xFF, fully_marked_mask_count * sizeof(Uint64)); | |
fg->masks[fully_marked_mask_count] |= ((~cast_val(Uint64, 0x0ull)) >> (64 - extra_marked_bits_count)); | |
Uint32 total_marked_masks = fully_marked_mask_count + (extra_marked_bits_count ? 1 : 0); | |
Uint32 fully_filled_rows = total_marked_masks / FG_SIDE; | |
Uint8 partially_filled_row_size = total_marked_masks % FG_SIDE; | |
Uint32 total_rows = fully_filled_rows + (partially_filled_row_size ? 1 : 0); | |
fg->row_mask = ((~cast_val(Uint64, 0x0ull)) >> (64 - total_rows)); | |
Uint32 col_bits_count = fully_filled_rows ? FG_SIDE : partially_filled_row_size; | |
fg->col_mask = ((~cast_val(Uint64, 0x0ull)) >> (64 - minimum(FG_SIDE, col_bits_count))); | |
for (Size i = 0; i < fully_filled_rows; i++) { | |
fg->count_masks_in_row[i] = FG_SIDE; | |
} | |
if (partially_filled_row_size) { | |
pragma_msvc("warning ( push )"); | |
pragma_msvc("warning ( disable: 6386 )"); // Analyze: WRITE_OVERRUN (Buffer overrun while writing to 'fg->count_masks_in_row') | |
fg->count_masks_in_row[fully_filled_rows] = partially_filled_row_size; | |
pragma_msvc("warning ( pop )"); | |
} | |
for (Size i = 0; i < FG_SIDE; i++) { | |
fg->count_masks_in_col[i] = cast_val(Uint8, fully_filled_rows); | |
if (partially_filled_row_size && (i < partially_filled_row_size)) { | |
fg->count_masks_in_col[i]++; | |
} | |
} | |
return; | |
} | |
header_function | |
void fgDelete (Free_Grid *fg) | |
{ | |
unused_variable(fg); | |
} | |
header_function | |
Bool fgAllocate (Free_Grid *fg, Uint32 *index) | |
{ | |
Uint8 row_first_bit_index = bitLSBU64(fg->row_mask); | |
if (row_first_bit_index == cast_val(Uint8, -1)) { | |
return false; | |
} | |
debugAssert(row_first_bit_index < 64); | |
Uint8 col_first_bit_index = bitLSBU64(fg->col_mask); | |
debugAssert(col_first_bit_index != cast_val(Uint8, -1)); | |
while (!fg->masks[(row_first_bit_index * FG_SIDE) + col_first_bit_index]) { | |
col_first_bit_index++; | |
} | |
debugAssert(col_first_bit_index < 64); | |
Uint32 mask_array_index = row_first_bit_index * FG_SIDE + col_first_bit_index; | |
debugAssert(mask_array_index < (64*64)); | |
debugAssert(fg->masks[mask_array_index]); | |
Uint8 mask_first_bit_index = bitLSBU64(fg->masks[mask_array_index]); | |
debugAssert(mask_first_bit_index != cast_val(Uint8, -1)); | |
*index = mask_array_index * 64 + cast_val(Uint32, mask_first_bit_index); | |
debugAssert(*index < fg->count); | |
fg->masks[mask_array_index] &= ~(1ULL << mask_first_bit_index); | |
if (fg->masks[mask_array_index] == 0) { | |
fg->count_masks_in_row[row_first_bit_index]--; | |
fg->count_masks_in_col[col_first_bit_index]--; | |
if (fg->count_masks_in_row[row_first_bit_index] == 0) { | |
fg->row_mask &= ~(1ULL << row_first_bit_index); | |
} | |
if (fg->count_masks_in_col[col_first_bit_index] == 0) { | |
fg->col_mask &= ~(1ULL << col_first_bit_index); | |
} | |
} | |
fg->allocated++; | |
return true; | |
} | |
header_function | |
void fgFree (Free_Grid *fg, Uint32 index) | |
{ | |
debugAssert(index < fg->count); | |
Uint32 fully_traversed_mask_index = (index + 1) / 64; | |
Uint32 extra_untreaded_bits_count = (index + 1) % 64; | |
Uint32 total_relevant_mask_count = fully_traversed_mask_index + (extra_untreaded_bits_count ? 1 : 0); | |
Uint32 final_relevant_mask_index = total_relevant_mask_count - 1; | |
Uint32 row_bit_index = final_relevant_mask_index / FG_SIDE; | |
Uint32 col_bit_index = final_relevant_mask_index - (row_bit_index * FG_SIDE); | |
Uint32 relevant_bit_index; | |
if (extra_untreaded_bits_count) { | |
relevant_bit_index = extra_untreaded_bits_count - 1; | |
} else { | |
relevant_bit_index = 63; | |
} | |
debugAssert((fg->masks[final_relevant_mask_index] & (1ULL << relevant_bit_index)) == 0); | |
Uint64 old_mask = fg->masks[final_relevant_mask_index]; | |
fg->masks[final_relevant_mask_index] |= (1ULL << relevant_bit_index); | |
if (old_mask == 0) { | |
fg->count_masks_in_row[row_bit_index]++; | |
fg->count_masks_in_col[col_bit_index]++; | |
fg->row_mask |= (1ULL << row_bit_index); | |
fg->col_mask |= (1ULL << col_bit_index); | |
} | |
} | |
#undef FG_SIZE | |
/************************************************************************************** | |
* Printing | |
*/ | |
#define PRINT_FUNC(name) Size name (void *userdata, const Char *fmt, ...) | |
#define PRINT_CALLBACK_FUNC(name) void name (void *userdata, Char *filled_buffer, Size buffer_len, Size buffer_cap) | |
typedef enum Print_Flags { | |
Print_Flags_LEFT_JUSTIFIED = 1u << 0, | |
Print_Flags_ALTERNATE_FORM = 1u << 1, | |
Print_Flags_LEADING_PLUS = 1u << 2, | |
Print_Flags_LEADING_SPACE = 1u << 3, | |
Print_Flags_LEADING_ZERO = 1u << 4, | |
Print_Flags_INT8 = 1u << 5, | |
Print_Flags_INT16 = 1u << 6, | |
Print_Flags_INT64 = 1u << 7, | |
Print_Flags_NEGATIVE = 1u << 8, | |
Print_Flags_FLOAT_FIXED = 1u << 9, | |
Print_Flags_FLOAT_EXP = 1u << 10, | |
Print_Flags_FLOAT_HEX = 1u << 11, | |
} Print_Flags; | |
// FIXME(naman): This needs to change to either yield or call a callback every n characters, | |
// so that functions using this don't have to call it twice (once to get size, then to | |
// get the actual characters) in cases when the buffer is too small the first time. | |
// FIXME(naman): Replace memcpy, etc. with loops so that atleast some of the characters | |
// can get copied if the buffer is too small. Once this is done, uncomment sbPrintSized() | |
// TODO(naman): Add the following features: | |
// * Comma in numbers | |
// + Indian style (`) | |
// + Western style (') | |
// * Unit suffix | |
// + SI (1 K = 1000) (:) | |
// + IEC (1 K = 1024) (;) | |
header_function | |
Size printStringVarArg (Char *buffer, Size buffer_size, | |
Char const *format, va_list va, | |
PRINT_CALLBACK_FUNC((*callback)), void *userdata) | |
{ | |
debugAssert(buffer); | |
debugAssert(buffer_size); | |
debugAssert(format); | |
debugAssert(callback); | |
Char const *fmt = format; | |
Size buffer_index = 0; | |
Size total_size = 0; | |
#define PRINT_ADD_CHAR(_c) do { \ | |
if (buffer_index == buffer_size) { \ | |
callback(userdata, buffer, buffer_index, buffer_size); \ | |
buffer_index = 0; \ | |
} \ | |
buffer[buffer_index++] = _c; \ | |
total_size++; \ | |
} while (0) | |
while (true) { | |
// Copy everything up to the next % (or end of string) | |
while ((fmt[0] != '%') && (fmt[0] != '\0')) { | |
PRINT_ADD_CHAR(fmt[0]); | |
fmt++; | |
} | |
if (fmt[0] == '%') { | |
Char const *format_pointer = fmt; | |
fmt++; | |
// read the modifiers first | |
Uint32 flags = 0; | |
// flags | |
while (true) { | |
switch (fmt[0]) { | |
case '-': { // if we have left justify | |
flags |= Print_Flags_LEFT_JUSTIFIED; | |
fmt++; | |
continue; | |
} break; | |
case '#': { // if we use alternate form | |
flags |= Print_Flags_ALTERNATE_FORM; | |
fmt++; | |
continue; | |
} break; | |
case '+': { // if we have leading plus | |
flags |= Print_Flags_LEADING_PLUS; | |
fmt++; | |
continue; | |
} break; | |
case ' ': { // if we have leading space | |
flags |= Print_Flags_LEADING_SPACE; | |
fmt++; | |
continue; | |
} break; | |
case '0': { // if we have leading zero | |
flags |= Print_Flags_LEADING_ZERO; | |
fmt++; | |
goto flags_done; | |
} break; | |
default: { | |
goto flags_done; | |
} break; | |
} | |
} | |
flags_done: | |
Sint minimum_width = 0; // called field width in documentation | |
// get the field width | |
if (fmt[0] == '*') { | |
minimum_width = va_arg(va, Sint); | |
fmt++; | |
} else { | |
while ((fmt[0] >= '0') && (fmt[0] <= '9')) { | |
minimum_width = (minimum_width * 10) + (fmt[0] - '0'); | |
fmt++; | |
} | |
} | |
Sint precision = -1; | |
// get the precision | |
if (fmt[0] == '.') { | |
fmt++; | |
if (fmt[0] == '*') { | |
precision = va_arg(va, Sint); | |
fmt++; | |
} else { | |
precision = 0; | |
if (fmt[0] == '-') { // Negative precision is treated as if precision was omitted | |
fmt++; | |
precision = -1; | |
while ((fmt[0] >= '0') && (fmt[0] <= '9')) { | |
fmt++; | |
} | |
} else { | |
while ((fmt[0] >= '0') && (fmt[0] <='9')) { | |
precision = (precision * 10) + (fmt[0] - '0'); | |
fmt++; | |
} | |
} | |
} | |
} | |
// handle integer size overrides | |
switch (fmt[0]) { | |
case 'h': { // are we 64-bit | |
flags |= Print_Flags_INT16; | |
fmt++; | |
if (fmt[0] == 'h') { | |
flags &= ~cast_val(Uint32, Print_Flags_INT16); | |
flags |= Print_Flags_INT8; | |
fmt++; | |
} | |
} break; | |
case 'l': { // are we 64-bit | |
flags |= Print_Flags_INT64; | |
fmt++; | |
if (fmt[0] == 'l') fmt++; | |
} break; | |
case 'z': { // are we 64-bit on size_t? | |
flags |= (sizeof(Size) == 8) ? Print_Flags_INT64 : 0; | |
fmt++; | |
} break; | |
case 't': { // are we 64-bit on ptrdiff_t? | |
flags |= (sizeof(Dptr) == 8) ? Print_Flags_INT64 : 0; | |
fmt++; | |
} break; | |
default: { | |
} break; | |
} | |
# define PRINT_STR_SIZE 2048ULL | |
Char num_str[PRINT_STR_SIZE]; | |
Char *num_str_ptr = num_str; | |
Char *str = nullptr; | |
Char head_str[8] = {0}; | |
Size head_index = 0; | |
Char tail_str[8] = {0}; | |
Size tail_index = 0; | |
Size len = 0; | |
switch (fmt[0]) { // handle each replacement | |
case 's': { // string | |
// get the string | |
str = va_arg(va, Char*); | |
if (str == nullptr) { | |
str = cast_const(Char *, "null"); | |
} | |
// By this point, str is most definitely not nullptr | |
len = strlen(str); | |
// clamp to precision | |
// Since precision inits at -1, if none was mentioned, this will not execute | |
if (len > cast_val(Size, precision)) { | |
len = cast_val(Size, precision); | |
} | |
precision = 0; | |
} break; | |
case 'c': { // char | |
// get the character | |
str = num_str_ptr + PRINT_STR_SIZE - 1; | |
*str = cast_val(Char, va_arg(va, Sint)); | |
len = 1; | |
precision = 0; | |
} break; | |
case 'n': { // weird write-bytes specifier | |
if (flags & Print_Flags_INT64) { | |
Sint64 *p = va_arg(va, Sint64*); | |
*p = cast_val(Sint64, buffer_index); | |
} else if (flags & Print_Flags_INT16) { | |
Sint16 *p = va_arg(va, Sint16*); | |
*p = cast_val(Sint16, buffer_index); | |
} else if (flags & Print_Flags_INT8) { | |
Sint8 *p = va_arg(va, Sint8*); | |
*p = cast_val(Sint8, buffer_index); | |
} else { | |
Sint *p = va_arg(va, Sint*); | |
*p = cast_val(Sint, buffer_index); | |
} | |
} break; | |
case 'b': case 'B': | |
case 'o': case 'O' : | |
case 'x': case 'X': { // binary | |
enum { BIN, OCT, HEX } base; | |
Bool upper = false; | |
Char type = fmt[0]; | |
switch (type) { | |
case 'b': base = BIN; break; | |
case 'B': base = BIN; upper = true; break; | |
case 'o': base = OCT; break; | |
case 'O': base = OCT; upper = true; break; | |
case 'x': base = HEX; break; | |
case 'X': base = HEX; upper = true; break; | |
default: { | |
base = BIN; // To silence unitialized variable warning | |
debugAssert(false && "Print: Integer base unitialized"); | |
} break; | |
} | |
Uint64 num; | |
if (flags & Print_Flags_INT64) { | |
num = va_arg(va, Uint64); | |
} else { | |
num = va_arg(va, Uint32); | |
} | |
Uint64 num_dec = num; | |
if (flags & Print_Flags_INT8) { | |
num_dec = cast_val(Uint8, num_dec); | |
} else if (flags & Print_Flags_INT16) { | |
num_dec = cast_val(Uint16, num_dec); | |
} | |
str = num_str_ptr + PRINT_STR_SIZE; | |
if ((num == 0) && (precision == 0)) { | |
break; | |
} | |
while (true) { | |
Uint64 and_mask = 0; | |
switch (base) { | |
case BIN: and_mask = 0x1ULL; break; | |
case OCT: and_mask = 0x7ULL; break; | |
case HEX: and_mask = 0xFULL; break; | |
default: debugBreak(); break; | |
} | |
Uint64 shift_magnitude = 0; | |
switch (base) { | |
case BIN: shift_magnitude = 1ULL; break; | |
case OCT: shift_magnitude = 3ULL; break; | |
case HEX: shift_magnitude = 4ULL; break; | |
default: debugBreak(); break; | |
} | |
Uint64 n = num_dec & and_mask; | |
num_dec = num_dec >> shift_magnitude; | |
str--; | |
switch (base) { | |
case BIN: { | |
*str = (n == 1) ? '1' : '0'; | |
} break; | |
case OCT: { | |
switch (n) { | |
case 0: *str = '0'; break; | |
case 1: *str = '1'; break; | |
case 2: *str = '2'; break; | |
case 3: *str = '3'; break; | |
case 4: *str = '4'; break; | |
case 5: *str = '5'; break; | |
case 6: *str = '6'; break; | |
case 7: *str = '7'; break; | |
default: debugBreak(); break; | |
} | |
} break; | |
case HEX: { | |
switch (n) { | |
case 0x0: *str = '0'; break; | |
case 0x1: *str = '1'; break; | |
case 0x2: *str = '2'; break; | |
case 0x3: *str = '3'; break; | |
case 0x4: *str = '4'; break; | |
case 0x5: *str = '5'; break; | |
case 0x6: *str = '6'; break; | |
case 0x7: *str = '7'; break; | |
case 0x8: *str = '8'; break; | |
case 0x9: *str = '9'; break; | |
case 0xA: *str = upper ? 'A' : 'a'; break; | |
case 0xB: *str = upper ? 'B' : 'b'; break; | |
case 0xC: *str = upper ? 'C' : 'c'; break; | |
case 0xD: *str = upper ? 'D' : 'd'; break; | |
case 0xE: *str = upper ? 'E' : 'e'; break; | |
case 0xF: *str = upper ? 'F' : 'f'; break; | |
default: debugBreak(); break; | |
} | |
} break; | |
default: debugBreak(); break; | |
} | |
if ((num_dec != 0) || (((num_str_ptr + PRINT_STR_SIZE) - str) < precision)) { | |
continue; | |
} else { | |
break; | |
} | |
} | |
len = (cast_ptr(Uptr, num_str_ptr) + PRINT_STR_SIZE) - cast_ptr(Uptr, str); | |
Char head_char = 0; | |
switch (base) { | |
case BIN: head_char = upper ? 'B' : 'b'; break; | |
case OCT: head_char = upper ? 'O' : 'o'; break; | |
case HEX: head_char = upper ? 'X' : 'x'; break; | |
default: debugBreak(); break; | |
} | |
if (flags & Print_Flags_ALTERNATE_FORM) { | |
head_str[head_index++] = '0'; | |
if (upper) { | |
head_str[head_index++] = head_char; | |
} else { | |
head_str[head_index++] = head_char; | |
} | |
} | |
if (precision < 0) { | |
precision = 0; | |
} | |
} break; | |
case 'u': | |
case 'd': { // integer | |
// get the integer and abs it | |
Uint64 num = 0; | |
if (flags & Print_Flags_INT64) { | |
Sint64 i64 = va_arg(va, Sint64); | |
if ((fmt[0] != 'u') && (i64 < 0)) { | |
num = 0ULL - cast_val(Uint64, i64); | |
flags |= Print_Flags_NEGATIVE; | |
} else { | |
num = cast_val(Uint64, i64); | |
} | |
} else { | |
Sint32 i = va_arg(va, Sint32); | |
if ((fmt[0] != 'u') && (i < 0)) { | |
flags |= Print_Flags_NEGATIVE; | |
num = cast_val(Uint64, 0U - cast_val(Uint32, i)); | |
} else { | |
num = cast_val(Uint64, i); | |
} | |
} | |
// convert to string | |
Uint64 num_dec = num; | |
if (flags & Print_Flags_INT8) { | |
num_dec = cast_val(Uint8, num_dec); | |
} else if (flags & Print_Flags_INT16) { | |
num_dec = cast_val(Uint16, num_dec); | |
} | |
str = num_str_ptr + PRINT_STR_SIZE; | |
if ((num == 0) && (precision == 0)) { | |
break; | |
} | |
while (num_dec) { | |
str--; | |
*str = cast_val(Char, num_dec % 10) + '0'; | |
num_dec /= 10; | |
} | |
// get the length that we copied | |
len = (cast_ptr(Uptr, num_str_ptr) + PRINT_STR_SIZE) - cast_ptr(Uptr, str); | |
if (len == 0) { | |
--str; | |
*str = '0'; | |
len = 1; | |
} | |
if (flags & Print_Flags_NEGATIVE) { | |
head_str[head_index++] = '-'; | |
} | |
if (flags & Print_Flags_LEADING_PLUS) { | |
head_str[head_index++] = '+'; | |
} | |
if (flags & Print_Flags_LEADING_SPACE) { | |
if (!(flags & Print_Flags_NEGATIVE)) { | |
head_str[head_index++] = ' '; | |
} | |
} | |
if (flags & Print_Flags_LEADING_ZERO) { | |
head_str[head_index++] = '0'; | |
} | |
if (precision < 0) { | |
precision = 0; | |
} | |
} break; | |
case 'p': { // pointer | |
const void *pv = va_arg(va, void*); | |
const Uptr pval = cast_ptr(Uptr, pv); | |
const Uint64 num = cast_val(Uint64, pval); | |
debugAssert(sizeof(pval) <= sizeof(num)); | |
Uint64 num_dec = num; | |
str = num_str_ptr + PRINT_STR_SIZE; | |
while (true) { | |
Uint64 n = num_dec & 0xf; | |
num_dec = num_dec >> 4; | |
str--; | |
switch (n) { | |
case 0x0: *str = '0'; break; | |
case 0x1: *str = '1'; break; | |
case 0x2: *str = '2'; break; | |
case 0x3: *str = '3'; break; | |
case 0x4: *str = '4'; break; | |
case 0x5: *str = '5'; break; | |
case 0x6: *str = '6'; break; | |
case 0x7: *str = '7'; break; | |
case 0x8: *str = '8'; break; | |
case 0x9: *str = '9'; break; | |
case 0xA: *str = 'A'; break; | |
case 0xB: *str = 'B'; break; | |
case 0xC: *str = 'C'; break; | |
case 0xD: *str = 'D'; break; | |
case 0xE: *str = 'E'; break; | |
case 0xF: *str = 'F'; break; | |
default: debugBreak(); break; | |
} | |
if ((num_dec != 0) || (((num_str_ptr + PRINT_STR_SIZE) - str) < precision)) { | |
continue; | |
} else { | |
break; | |
} | |
} | |
len = (cast_ptr(Uptr, num_str_ptr) + PRINT_STR_SIZE) - cast_ptr(Uptr, str); | |
} break; | |
case 'f': case 'F': | |
case 'e': case 'E': | |
case 'a': case 'A': { | |
switch (fmt[0]) { | |
case 'f': case 'F': { flags |= Print_Flags_FLOAT_FIXED; } break; | |
case 'e': case 'E': { flags |= Print_Flags_FLOAT_EXP; } break; | |
case 'a': case 'A': { flags |= Print_Flags_FLOAT_HEX; } break; | |
default: break; | |
} | |
Bool capital = false; | |
switch (fmt[0]) { | |
case 'F': case 'E': case 'A': { capital = true; } break; | |
default: break; | |
} | |
Bool precision_provided = precision >= 0; | |
if (flags & Print_Flags_FLOAT_HEX) { | |
// Default precision is set to -1, so this means that if precision is not mentioned, | |
// we print 13 digits which is enough for a lossless roundtrip. | |
if (precision < 0) { | |
precision = 13; | |
} else if (precision > 17) { | |
precision = 13; | |
} | |
} else { | |
// Since decimal (non-hex) is for human reading, 6 is the right precision | |
// (considering that it is also default in libc). | |
if (precision < 0) { | |
precision = 6; | |
} else if (precision > 17) { | |
precision = 17; | |
} | |
} | |
Float64 f64 = va_arg(va, Float64); | |
union FU64 { Float64 f; Uint64 u;} fu64 = {.f = f64}; | |
Uint64 bits = fu64.u; | |
/* NOTE(naman): 64-bit IEEE-754 Floating Point structure | |
____________________________________________ | |
| Sign bit | Exponent bits | Mantissa bits| | |
| 1 | 11 | 52 | | |
-------------------------------------------- | |
*/ | |
#define PRINT_F64_MANTISSA_BITS 52 | |
#define PRINT_F64_EXPONENT_BITS 11 | |
#define PRINT_F64_BIAS 1023 | |
// Is the top bit set? | |
Bool negative = bits >> (PRINT_F64_MANTISSA_BITS + PRINT_F64_EXPONENT_BITS); | |
// Remove all mantissa bits ------------------ and then reset the sign bit | |
Uint32 exponent_biased = ((cast_val(Uint32, bits >> PRINT_F64_MANTISSA_BITS)) & | |
((1U << PRINT_F64_EXPONENT_BITS) - 1U)); | |
Sint32 exponent = cast_val(Sint32, exponent_biased) - PRINT_F64_BIAS; | |
// Reset all bits except for the mantissa bits | |
Uint64 mantissa = bits & ((1ULL << PRINT_F64_MANTISSA_BITS) - 1ULL); | |
if (negative) { | |
flags |= Print_Flags_NEGATIVE; | |
} | |
{ // Leading String | |
if (flags & Print_Flags_NEGATIVE) { | |
head_str[head_index++] = '-'; | |
} else if (flags & Print_Flags_LEADING_PLUS) { | |
head_str[head_index++] = '+'; | |
} else if (flags & Print_Flags_LEADING_SPACE) { | |
if (!(flags & Print_Flags_NEGATIVE)) { | |
head_str[head_index++] = ' '; | |
} | |
} | |
if ((flags & Print_Flags_LEADING_ZERO) && | |
!(flags & Print_Flags_FLOAT_HEX)) { | |
head_str[head_index++] = '0'; | |
} | |
} | |
// Step 1: Print if it is NaN/inf or zero | |
if (exponent_biased == 0x7FF) { // Handle NaN and Inf | |
str = num_str; | |
if (capital) { | |
if (mantissa) { | |
*str++ = 'N'; | |
*str++ = 'A'; | |
*str++ = 'N'; | |
} else { | |
*str++ = 'I'; | |
*str++ = 'N'; | |
*str++ = 'F'; | |
} | |
} else { | |
if (mantissa) { | |
*str++ = 'n'; | |
*str++ = 'a'; | |
*str++ = 'n'; | |
} else { | |
*str++ = 'i'; | |
*str++ = 'n'; | |
*str++ = 'f'; | |
} | |
} | |
} else if (exponent_biased == 0 && mantissa == 0) { | |
if (flags & Print_Flags_FLOAT_HEX) { | |
head_str[head_index++] = '0'; | |
head_str[head_index++] = capital ? 'X' : 'x'; | |
head_str[head_index++] = '0'; | |
} | |
str = num_str; | |
*str++ = '0'; | |
if (precision > 0) { | |
*str++ = '.'; | |
memset(str, '0', cast_val(Size, precision)); | |
str += precision; | |
} | |
if (flags & Print_Flags_FLOAT_EXP) { | |
tail_str[tail_index++] = capital ? 'E' : 'e'; | |
tail_str[tail_index++] = '+'; | |
tail_str[tail_index++] = '0'; | |
tail_str[tail_index++] = '0'; | |
} else if (flags & Print_Flags_FLOAT_HEX) { | |
tail_str[tail_index++] = capital ? 'P' : 'p'; | |
tail_str[tail_index++] = '+'; | |
tail_str[tail_index++] = '0'; | |
} | |
} | |
// Normalized means that the float can always be represented as | |
// mantissa_normalised * 2^exponent_normalised | |
Uint64 mantissa_normalised; | |
Sint32 exponent_normalised; | |
if ((exponent_biased == 0) && (mantissa != 0)) { // Subnormal/Denormals | |
// Value of float is: | |
// (mantissa / 2^52) * 2^(1-1023) | |
// => mantissa * 2^(1 - 1023 - 52) | |
// => mantissa * 2^(-1074) | |
// Meaning, mantissa: mantissa ; exponent: -1074 | |
mantissa_normalised = mantissa; | |
exponent_normalised = -1074; | |
} else { | |
// If the float is normal, the value is: | |
// (1 + (mantissa / 2^52)) * 2^(exponent) | |
// => (2^52 + mantissa) * 2^(exponent - 52) | |
// Meaning, mantissa: 2^52 + mantissa ; exponent: exponent - 52 | |
mantissa_normalised = mantissa | (1ULL << 52); | |
exponent_normalised = exponent - 52; | |
} | |
// Step 2: Print if it a hexadecimal float | |
if ((str == nullptr) && (flags & Print_Flags_FLOAT_HEX)) { | |
head_str[head_index++] = '0'; | |
head_str[head_index++] = capital ? 'X' : 'x'; | |
Uint64 man = mantissa_normalised; | |
Sint32 exp = exponent_normalised + PRINT_F64_MANTISSA_BITS; | |
if (mantissa || precision) { | |
/* This makes it so that the MSB of mantissa is on | |
* 60th bit, and the normal/denormal bit (that we set above) | |
* is on 61st. This is done so that we can later extract the | |
* bit a nibble at a time by taking the top 4 bits repeatedly like this: | |
* (man >> 60) & 0xF; man <<= 4; | |
* This prevents having to do more complex masking. | |
* | |
* To find out how much to shift, we need to know how many nibbles | |
* need to be printed, and then shift so that that many nibbles end up | |
* in the front (when going from MSB to LSB). That calculation is: | |
* bitsInFloat64 - (bitsInNibble * (MANTISSA_BITS/bitsInNibble + 1)) | |
* => 64 - (4 * (52/4 + 1)) | |
* => 64 - (4 * (13 + 1)) | |
* => 64 - (4 * 14) | |
* => 64 - 56 | |
* => 8 | |
* (1 at the end comes to handle the 53rd implicit bit that we just added above) | |
*/ | |
man <<= 8; | |
/* This will do the rounding if the precision is smaller than maximum. | |
* It does this by incrementing the MSB of mantissa that won't be printed. | |
* Doing that will send a carry forward if the nibble associated with | |
* that MSB was >= 8; and not if it was < 7. Similarly, the carry will | |
* propagate further, rounding each nibble to its nearest value, | |
* with ties to even (i.e., round-to-nearest-even) which is considered default. | |
*/ | |
if (precision < 13) { | |
/* Now that we have moved all the nibbles to the top with `man <<= 8`, | |
* the bit index of the first nibble that will NOT be printed | |
* is (59 - 4*precision). | |
*/ | |
const Uint64 one_at_unprinted_msb_if_precision_is[13] = { | |
1ULL << (59 - 4 * 0), // 576460752303423488ULL, 0x800000000000000 | |
1ULL << (59 - 4 * 1), // 36028797018963968ULL, 0x80000000000000 | |
1ULL << (59 - 4 * 2), // 2251799813685248ULL, 0x8000000000000 | |
1ULL << (59 - 4 * 3), // 140737488355328ULL, 0x800000000000 | |
1ULL << (59 - 4 * 4), // 8796093022208ULL, 0x80000000000 | |
1ULL << (59 - 4 * 5), // 549755813888ULL, 0x8000000000 | |
1ULL << (59 - 4 * 6), // 34359738368ULL, 0x800000000 | |
1ULL << (59 - 4 * 7), // 2147483648ULL, 0x80000000 | |
1ULL << (59 - 4 * 8), // 134217728ULL, 0x8000000 | |
1ULL << (59 - 4 * 9), // 8388608ULL, 0x800000 | |
1ULL << (59 - 4 * 10), // 524288ULL, 0x80000 | |
1ULL << (59 - 4 * 11), // 32768ULL, 0x8000 | |
1ULL << (59 - 4 * 12), // 2048ULL, 0x800 | |
}; | |
// Various masks used for rounding | |
Uint64 one_at_unprinted_msb = one_at_unprinted_msb_if_precision_is[precision]; | |
Uint64 ones_after_unprinted_msb = one_at_unprinted_msb - 1ULL; | |
Uint64 zeroes_at_unprinted = ~((ones_after_unprinted_msb << 1) | 1); | |
Uint64 one_at_printed_lsb = one_at_unprinted_msb << 1; | |
// https://medium.com/angular-in-depth/how-to-round-binary-fractions-625c8fa3a1af | |
// Truncate the mantissa so zero all bits that won't be printed due to precision | |
Uint64 lower = man & zeroes_at_unprinted; | |
// Set most significant unprinted bit (not zero) to one | |
Uint64 middle = lower + one_at_unprinted_msb; | |
// Add one bit at least significant printed bit (and deal with any carry forwards) | |
Uint64 upper = lower + one_at_printed_lsb; | |
if (man < middle) { | |
man = lower; | |
} else if (man > middle) { | |
man = upper; | |
} else { // man == middle | |
// Resolve tie to nearest even | |
if ((lower & one_at_printed_lsb) == 0) { | |
man = lower; | |
} else { | |
man = upper; | |
} | |
} | |
} | |
Char const *hexs = capital ? "0123456789ABCDEF" : "0123456789abcdef"; | |
str = num_str; | |
// This prints 0/1 depending on normal/denormal status | |
*str++ = hexs[(man >> 60) & 0xF]; | |
man <<= 4; | |
if (precision) *str++ = '.'; | |
Sint32 n = precision; | |
Uint32 count_of_end_zeroes = 0; | |
while (n--) { | |
if ((man >> 60) & 0xF) { | |
count_of_end_zeroes = 0; | |
} else { | |
count_of_end_zeroes++; | |
} | |
*str++ = hexs[(man >> 60) & 0xF]; | |
man <<= 4; | |
} | |
if (precision_provided) { | |
count_of_end_zeroes = 0; | |
} | |
str -= count_of_end_zeroes; | |
} else { | |
len = 0; | |
} | |
{ | |
tail_str[tail_index++] = capital ? 'P' : 'p'; | |
tail_str[tail_index++] = exp > 0 ? '+' : '-'; | |
exp = exp > 0 ? exp : -exp; | |
Char es[4] = {0}; | |
Char *ep = &es[elemin(es)]; | |
Char el = 0; | |
while (exp) { | |
*--ep = cast_val(Char, exp % 10) + '0'; | |
el++; | |
exp /= 10; | |
} | |
while (el) { | |
el--; | |
tail_str[tail_index++] = *ep++; | |
} | |
} | |
} | |
Bool rounding_up_added_a_new_integer_digit_on_the_left = true; | |
// If this is true, str will be set to num_str at the end, else to num_str+1. | |
// This is needed to deal with the carry digit when rounding due to less precision. | |
// Step 3: Print if it is the usual float to decimal situation | |
// FIXME(naman): This uses bigint arithmetic, which makes it quite slow (especially | |
// for doubles with huge integral parts). See about replacing it with | |
// the Swift's dtoa that uses a improved Grisu2/Errol3 algorithm: | |
// https://github.com/swiftlang/swift/blob/main/stdlib/public/runtime/SwiftDtoa.cpp | |
if (str == nullptr) { // If still unprinted, print through the long method | |
rounding_up_added_a_new_integer_digit_on_the_left = false; | |
str = num_str + 1; | |
// This algorithm is basically a fixed-point exact-conversion | |
// implementation (inspired by Tei Andu: https://pastebin.com/z9hYEWF1). | |
// Since this implementation is extremely simple, we don't try to find | |
// the minimum digits that allow round tripping, especially since | |
// `printf` like interfaces don't allow asking for it either. | |
// If you want round-tripping, print as a hex-float, or | |
// print with precision set to 17. | |
// If M = mantissa_normalised and X = exponent_normalised, | |
// then M should be interpreted as a number of shape 1.pqrs or 0.pqrs | |
// Multiplying with 2^X turns this into a real floating point value: | |
// v = M.2^X | |
// But X can be both positive and negative, which means that it could move | |
// the binary point in M right or left. And even if it is positive, it may not | |
// move the binary point enough for M to become an integer, which is what | |
// we want for easy processing. | |
// So, we multiply the whole value `v` with a huge number 2^p, where `p` | |
// is chosen so that it can overpower even the most negative X so that | |
// the whole number `v` becomes a single integer. | |
// N = v.2^p = M.2^X.2^p | |
// => N = v.2^p = M.2^(X+p) | |
// => N = v.2^p = M.2^L | |
// What this means is that we shift the mantissa M left by X+p (=L) bits to turn | |
// the whole number into a giant integer. Later, we can extract the integral | |
// and fractions parts (`I` and `F`) of the original value `v` using: | |
// I = floor(N/2^p) = N >> p | |
// F = N mod 2^p = N & ((1 << p) - 1) | |
// Now, L has to be greater than or equal to zero, since shifting right is stupid, | |
// even if the mantissa is already integral. So, | |
// L >= 0 | |
// => X+p >= 0 | |
// => p >= -X | |
// If `p` is greater than `-X`, we need highest value of `-X` which means we need | |
// lowest value of `X`. So let's find Xmin: | |
// First, let's say that the number of exponent bits is k. | |
// Then, bias is equal to 2^(k-1) - 1 | |
// And since the values of exponent_biased can not be 0 and 111...1 | |
// since these values are reserved for NaN and Inf, exponent_biased's range is | |
// [1, 2^k - 2]. | |
// To remove bias, we subtract it, so: | |
// exponent = exponent_biased - bias | |
// => exponent = [1, 2^k - 2] - (2^(k-1) - 1) | |
// => exponent = [1, 2^k - 2] - 2^(k-1) + 1 | |
// => exponent = [1 - 2^(k-1) + 1, 2.2^(k-1) - 2^(k-1) + 1] | |
// => exponent = [2 - 2^(k-1), 2^(k-1) - 1] | |
// For Float32, this is [-126, 127] and for Float64, this is [-1022, 1023] | |
// Since X = exponent - mantissa_bits, | |
// X for Float32 is [-126-23, 127-23] => [-149, 104] | |
// X for Float64 is [-1022-52, 1023-52] => [-1074, 971] | |
// Therefore, coming to the inequality, | |
// p >= -X | |
// => p >= -(-1074) | |
// => p >= 1074 | |
// Similarly to find out what is the maximum possible number of bits in `N`, we need | |
// to add the already existing bits in mantissa and how much we shifted them by. Thus, | |
// h = L + mantissa_bits | |
// => h = X + p + mantissa_bits | |
// => h = (exponent - mantissa_bits) + p + mantissa_bits | |
// => h = exponent + p | |
// Let's say that the big int that will store the shifted mantissa has B bits. | |
// Accounting for one carry bit, this means that, | |
// h <= B - 1 | |
// => exponent + p <= B - 1 | |
// => p <= B - 1 - exponent | |
// For inequality to hold forever, (B - 1 - exponent) has to be minimum, meaning | |
// that `exponent` has to be maximum. | |
// => p <= B - 1 - 1023 | |
// => p <= B - 1024 | |
// Combining the two inequalities about `p`, we get | |
// 1074 <= B - 1024 | |
// => B >= 2098 | |
// Since the big int B will be composed of Uint64s, we need to find out how many | |
// Uint64s it will take. 2098/(8*8) is 32.78, and the closest larger number that | |
// can be easily cleanly divided in two in 34. However, the Ryan Juckett algorithm used | |
// bigints with 35 Uint32s, so maybe its safer to use 36 Uint64s, meaning: | |
// B = 2304 | |
Uint64 bigint[36] = {0}; | |
// Once we set B, we can derive p by: | |
// p <= 2304 - 1024 | |
// => p <= 1280 | |
// and p >= 1074 | |
// The ideal choice is p = 1152, since it is B/2 (easy to deal with in various ways). | |
Uint pivot = 1152; // p above, 160 in reference | |
Sint total_left_shift_signed = exponent_normalised + cast_val(Sint, pivot); | |
debugAssert(total_left_shift_signed >= 0); | |
Uint total_left_shift = cast_val(Uint, total_left_shift_signed); | |
// `L` above, `mlsft` in reference | |
// The casting is safe since pivot is chosen to be big enough that it makes | |
// even fully negative X become positive after addition. | |
// Because pivot is divided by 64 evenly (1152/64=18), the pivot bit will fall | |
// on an element boundary. This means that in the total_left_shift, the shift | |
// contributed by pivot always ends at a element boundary, and then the shift by | |
// exponent_normalised (whether positive or negative) moves the mantissa bits | |
// left or right, potentially out of alignment with that element boundary. | |
Uint pivot_elem = pivot/64; | |
// Since we are using Uint64s for bigint, we need to calculate which elements | |
// of the array do the bits of mantissa get stored in first, and which bits get | |
// stored where. | |
// N = M.2^L | |
// => N = M.2^(64*(L/64) + (L%64)) | |
// If elems_shifted = L/64, and bits_shifted = L%64 | |
// => N = M.2^(bits_shifted).2^(64*elems_shifted) | |
// This means we first shift by enough bits to move the mantissa into the right | |
// relative position between two elements. Then we shift across the as many word | |
// until the mantissa is in the right place. | |
// NOTE(naman): The LSB->MSB of our mantissa is stored from index 0->35 | |
// So the best way to visualize the bigint array is: | |
// bigint = [ 35 | 34 | 33 | 32 | ... | 3 | 2 | 1 | 0 ] | |
// with the bits in each element going: | |
// element = [ 63 | 62 | 61 | ... | 3 | 2 | 1 | 0 ] | |
// In the rest of the comments below, I will refer to range within | |
// the array as [larger, smaller] indices, since that is what makes | |
// sense here. | |
Uint elems_shifted = total_left_shift >> 6; // L / 64, `a` in reference | |
Uint bits_shifted = total_left_shift & ((1u << 6) - 1u); // L % 64, `b` in reference | |
bigint[elems_shifted] = mantissa_normalised << bits_shifted; // Mantissa's LSB | |
if (bits_shifted) { | |
bigint[elems_shifted+1] = mantissa_normalised >> (64u - bits_shifted); // Mantissa's MSB | |
} | |
// The `I` (integer part) is in [35, pivot_elem] and the `F` (fractional part) is | |
// in [pivot_elem-1, 0]. | |
Char *str_int_begin = str; | |
Char *str_int_end; | |
{ // Print integral part | |
// The basic algorithm is to divide by 10 continuously and collect the remainder. | |
// This remainder is the digit to be printed in the next place value. Of course, | |
// when we are dealing with bigint, we have to modify the algorithm like this: | |
// carry = 0 | |
// for (&elem in integral_bigint from left to right) { | |
// current = carry.2^64 + *elem | |
// *elem = floor(current/10) | |
// carry = current % 10 | |
// } | |
// As an example, lets say we have the number 5150, and we are dealing with | |
// two digits at a time. Diving by 10 will mean first doing 51/10=5 and getting | |
// carry of 1. Then, we multiply the carry 1 with 100, and add it to 50 to get 150, | |
// which is divided again as 150/10=15. Thus, the final quotient is 515. | |
// Now, doing the (carry.2^64 + *elem) will require more than 64-bits, which we don't | |
// have. Maybe we could do it in 128-bits, but there is an easier hack. What if we break | |
// this operation into two 32-bit ops? That way, we can get it done directly without | |
// needing any new helper functions or data types. | |
// Let start with: | |
// current = carry.2^64 + *elem | |
// Divide *elem in two 32-bit parts, `hi` and `lo` | |
// *elem = hi.2^32 + lo | |
// Let's say: | |
// cur_hi = carry.2^32 + hi | |
// and cur_hi = 10.q_hi + r_hi // Quotient and Remainder | |
// Similarly, | |
// cur_lo = r_hi.2^32 + lo | |
// and cur_lo = 10.q_lo + r_lo | |
// Combining: | |
// current = carry.2^64 + *elem | |
// => current = carry.2^64 + hi.2^32 + lo | |
// => current = 2^32.(carry.2^32 + hi) + lo | |
// => current = 2^32.cur_hi + lo | |
// => current = 2^32.(10.q_hi + r_hi) + lo | |
// => current = 2^32.10.q_hi + 2^32.r_hi + lo | |
// => current = (q_hi << 32).10 + cur_lo | |
// => current = (q_hi << 32).10 + 10.q_lo + r_lo | |
// => current = 10.((q_hi << 32) + q_lo) + r_lo | |
// And so, we can calculate: | |
// floor(current/10) = (q_hi << 32) +q_lo | |
// and current % 10 = r_lo | |
Bool non_zero_digits_left = false; | |
Sint non_zero_starts_at = cast_val(Sint, elems_shifted + 1); | |
str_int_begin = str; | |
do { | |
Uint32 carry = 0; | |
non_zero_digits_left = false; | |
for (Sint i = non_zero_starts_at; i >= cast_val(Sint, pivot_elem); i--) { | |
Uint64 elem = bigint[i]; | |
Uint32 hi = cast_val(Uint32, elem >> 32); | |
Uint32 lo = cast_val(Uint32, elem); | |
Uint64 cur_hi = (cast_val(Uint64, carry) << 32) + hi; | |
Uint32 q_hi = cast_val(Uint32, cur_hi / 10u); | |
Uint32 r_hi = cast_val(Uint32, cur_hi % 10u); | |
Uint64 cur_lo = (cast_val(Uint64, r_hi) << 32) + lo; | |
Uint32 q_lo = cast_val(Uint32, cur_lo / 10u); | |
Uint32 r_lo = cast_val(Uint32, cur_lo % 10u); | |
bigint[i] = (cast_val(Uint64, q_hi) << 32) + q_lo; | |
carry = r_lo; | |
Bool is_zero = bigint[i] == 0; | |
if ((i == non_zero_starts_at) && is_zero) { | |
non_zero_starts_at--; | |
} | |
non_zero_digits_left |= !is_zero; | |
} | |
debugAssert(carry < 10); | |
*str++ = cast_val(Char, carry) + '0'; | |
} while (non_zero_digits_left); | |
str_int_end = str - 1; | |
// Reverse the printed string | |
Char *begin = str_int_begin; | |
Char *end = str_int_end; | |
while (end > begin) { | |
Char swap = *end; | |
*end-- = *begin; | |
*begin++ = swap; | |
} | |
} | |
*str++ = '.'; | |
Char *str_frc_begin = str; | |
Char *str_frc_end; | |
{ // Print Fractional part | |
// When I say that `F` is the fractional part, what I mean is that the | |
// `F` is an integer that will make the fractional part if the number was | |
// written in the format `I.F` (where the dot is the decimal point, not | |
// multiplication. But as it is, if we simply were to read the bits in | |
// [pivot_elem-1, 0], we just get an integer F that represents | |
// the fractional part. So, to actually get the digits out of it, | |
// we will have to figure out how to treat the integer F as a fraction. | |
// Also, remember that we have a total `pivot` number of bits in F, | |
// meaning that the decimal point (after multiplying 2^p originally when | |
// making the whole floating point an integer) is on the pivot point. | |
// So, the way to get the fractional part is to read only what comes after | |
// the pivot. | |
// To calculate the digit: | |
// d = floor((10 * F)/2^p) | |
// What this means is that we first multiply F by 10 (thus moving one digit to | |
// the left of the decimal point), and then right-shift by p-bits so that all | |
// the numbers right of the decimal point disappear, leaving only the one digit | |
// that was to the left of the point. | |
// Once this is done, before we can do the next digit, we change F with: | |
// F = (10 * F) % 2^p | |
// Basically, this gives us the bits right of the decimal point after multiplying | |
// with 10. | |
// Since we are dealing with bigint, the multiplication algorithm has to be: | |
// carry = 0 | |
// while (&elem in fractional_bigint from right to left) { | |
// product = *elem * 10 | |
// *elem = carry + (product % 2^64) | |
// carry = product / 2^64 | |
// } | |
// As an example, lets say we have the number 0.5432, and we are dealing with | |
// two digits at a time. Multiplying with 10 will mean first doing 32*10=320 | |
// and getting result of 20 and carry of 3. Then, we multiply 54 with 10 | |
// to get 540 to which we add the carry 3, getting 543. Again, 43 remain and 5 | |
// becomes the carry. This 5 is the digit extracted, since the number now is | |
// 5.432, with 5 stored in carry and 4320 being left for future digit extraction. | |
// Similar to the integral part, doing the (*elem * 10) and (carry + product) | |
// will require more than 64-bits, which we don't have. So, let's try to do this | |
// with 32 bit ops again. | |
// Let's start with: | |
// product = *elem * 10 | |
// Divide *elem in two 32-bit parts, `hi` and `lo` | |
// *elem = hi.2^32 + lo | |
// Let's say: | |
// prod_lo = lo * 10 | |
// prod_lo_mod = prod_lo % 2^32 | |
// elem_lo = carry + prod_lo_mod | |
// carry_lo = prod_lo / 2^32 | |
// | |
// prod_hi = hi * 10 | |
// elem_hi = carry_lo + (prod_hi % 2^32) | |
// carry_hi = prod_hi / 2^32 | |
// | |
// *elem = elem_hi.2^32 + elem_lo | |
// carry = carry_hi | |
Bool non_zero_digits_left = false; | |
Uint fraction_starts_at = elems_shifted; | |
Uint digit_count = 0; | |
// This loop will always print atleast one digit | |
do { | |
Uint32 carry = 0; | |
non_zero_digits_left = false; | |
for (Uint32 i = fraction_starts_at; i < pivot_elem; i++) { | |
Uint64 elem = bigint[i]; | |
Uint32 hi = cast_val(Uint32, elem >> 32); | |
Uint32 lo = cast_val(Uint32, elem); | |
Uint64 prod_lo = cast_val(Uint64, lo) * 10u; | |
Uint32 prod_lo_mod = cast_val(Uint32, prod_lo); | |
Uint32 elem_lo = carry + prod_lo_mod; | |
Uint32 carry_lo = cast_val(Uint32, prod_lo >> 32); | |
if (elem_lo < prod_lo_mod) { // Overflow when adding carry | |
carry_lo++; | |
} | |
Uint64 prod_hi = cast_val(Uint64, hi) * 10u; | |
Uint32 prod_hi_mod = cast_val(Uint32, prod_hi); | |
Uint32 elem_hi = carry_lo + prod_hi_mod; | |
Uint32 carry_hi = cast_val(Uint32, prod_hi >> 32); | |
if (elem_hi < prod_hi_mod) { // Overflow when adding carry | |
carry_hi++; | |
} | |
bigint[i] = (cast_val(Uint64, elem_hi) << 32) | elem_lo; | |
carry = carry_hi; | |
Bool is_zero = bigint[i] == 0; | |
if ((i == fraction_starts_at) && is_zero) { | |
fraction_starts_at++; | |
} | |
non_zero_digits_left |= !is_zero; | |
} | |
if (digit_count < (cast_val(Uint, precision) + 1)) { | |
debugAssert(carry < 10); | |
*str++ = cast_val(Char, carry) + '0'; | |
digit_count++; | |
} else { | |
// Even after printing (precision + 1) digits, the remaining fraction is | |
// not zero. So we mark that condition by setting digit_count to a value | |
// one greater than that. | |
digit_count = cast_val(Uint, precision) + 2; | |
} | |
} while (non_zero_digits_left && (digit_count <= (cast_val(Uint, precision) + 1))); | |
str_frc_end = str - 1; | |
unused_variable(str_frc_end); // Prevent "set but unused" warnings | |
// TODO(naman): Add support for printing floats in exponential notation (A.BCe+D) | |
// This may be useful: https://gist.github.com/forksnd/c9a91957e9e5c933635148d3b74f7d65 | |
// Now, do rounding based on round-half-to-even (Banker's rounding) | |
// https://en.wikipedia.org/wiki/Rounding#Rounding_half_to_even | |
// The basic algorithm is this, given precision after decimal point is p, | |
// 1. Let rounding_digit be the digit at position (p+1) | |
// 2. Let tail be all digits that come after rounding_digit | |
// 3. If rounding_digit > 5, round up | |
// 4. If rounding_digit < 5, round down (do nothing) | |
// 5. If rounding_digit == 5, | |
// A. If tail has any non-zero digits: round up | |
// B. If tail is all zeroes, | |
// a) If digit at position p is odd, round up | |
// b) If digit at position p is even, do nothing | |
// The edge cases to keep in mind are: | |
// 1. precision == 0, meaning there is no fractional part | |
// 2. Round-up cascade reaching into integral part from fractional part | |
// 3. For negative numbers, rounding is done to magnitude and sign is added later | |
Bool round_up; | |
if (digit_count <= cast_val(Uint, precision)) { // We needed more fractional digits | |
// We didn't collect (precision + 1) digits, so non_zero_digits_left must have | |
// become false, meaning that there is a bunch of zeroes that should trail our | |
// number. | |
if (cast_val(Uint, precision) > digit_count) { | |
Uint zero_count = cast_val(Uint, precision) - digit_count; | |
// Trailing zeroes | |
memset(str, '0', zero_count); | |
str += zero_count; | |
str_frc_end = str - 1; | |
} | |
// Since the fraction ended prematurely, there is no rounding needed either. | |
round_up = false; | |
} else { // We got all the fractional digits we needed | |
const Char rounding_digit = str_frc_begin[precision]; | |
// Bool tail_is_nonzero = ((digit_count > (cast_val(Uint, precision) + 1)) || | |
// non_zero_digits_left); | |
Bool tail_is_nonzero; | |
if (digit_count > (cast_val(Uint, precision) + 1)) { | |
// We captured more than precision+1 digits => there is a non-zero tail | |
tail_is_nonzero = true; | |
} else if (digit_count < (cast_val(Uint, precision) + 1)) { | |
// We didn't even capture the rounding digit => fraction ended early => no tail | |
tail_is_nonzero = false; | |
} else { | |
// exactly precision+1 digits were captured — use the post-multiply flag | |
tail_is_nonzero = non_zero_digits_left; | |
} | |
if (rounding_digit > '5') { | |
round_up = true; | |
} else if (rounding_digit < '5') { | |
round_up = false; | |
} else { // rounding_digit == 5 | |
if (tail_is_nonzero) { | |
round_up = true; | |
} else { | |
Char prev; | |
if (precision == 0) { // No fraction digits, rounding baed on integer | |
// We have still printed one fraction digit in the (precision + 1) | |
// slot, which is why the (rounding_digit == 5) condition is hit. | |
prev = *str_int_end; | |
} else { // Fraction digit present, rounding based on them | |
prev = str_frc_begin[precision - 1]; | |
} | |
Bool prev_is_odd = (prev - '0') & 1; | |
round_up = prev_is_odd; | |
} | |
} | |
if (round_up) { | |
Sint k = precision - 1; // -1 if precision == 0 | |
while (k >= 0) { // Only run if there are fractional digits to print | |
if (str_frc_begin[k] < '9') { // Can be rounded up in place | |
str_frc_begin[k]++; // Round up | |
break; // and we are done | |
} else { // str_frc_begin[k] == '9' | |
debugAssert(str_frc_begin[k] == '9'); | |
str_frc_begin[k] = '0'; // 9+1 = 10, so 0 here and 1 carries over | |
k--; // We need to apply the carry to the digit on the left | |
} | |
} | |
if (k < 0) { // Either no fractional digits, or 1 carried from fractional rounding | |
Char *int_digit = str_int_end; | |
while (int_digit >= str_int_begin) { | |
if (*int_digit < '9') { | |
(*int_digit)++; | |
break; | |
} else { | |
*int_digit = '0'; | |
int_digit--; | |
} | |
} | |
if (int_digit < str_int_begin) { // One more carry | |
debugAssert(int_digit == (str_int_begin - 1)); | |
rounding_up_added_a_new_integer_digit_on_the_left = true; | |
*int_digit = '1'; | |
str_int_begin--; | |
} | |
} | |
} | |
str = str_frc_begin + precision; | |
} | |
} | |
*str++ = '\0'; | |
} | |
len = cast_val(Uptr, str - num_str_ptr); | |
if (rounding_up_added_a_new_integer_digit_on_the_left) { | |
str = num_str_ptr; | |
} else { | |
str = num_str_ptr + 1; | |
} | |
precision = 0; | |
} break; | |
case '%': { | |
str = num_str_ptr; | |
str[0] = '%'; | |
len = 1; | |
precision = 0; | |
} break; | |
case '\0': { | |
// If the format string ends prematurely, we print | |
// whatever half-formed conversion specification came through; | |
// to do that, we decrement the pointer since we need to make | |
// sure that we don't end up copying the terminating null byte, | |
// since that will be put in the final output at the end. We | |
// also want to decrement since after the next fmt++ (at the | |
// end of the function), we want fmt to point to the null byte | |
// so that the priting will properly end. | |
fmt--; | |
goto nlib_print_unimplemented_feature; | |
} break; | |
default: { // unknown, just copy conversion specification | |
nlib_print_unimplemented_feature: | |
str = num_str_ptr; | |
while (format_pointer <= fmt) { | |
str[0] = format_pointer[0]; | |
format_pointer++; | |
str++; | |
} | |
len = cast_val(Size, str - num_str_ptr); | |
str = num_str_ptr; | |
precision = 0; | |
} break; | |
} | |
Size head_size = head_index; | |
head_index = 0; | |
Size tail_size = tail_index; | |
tail_index = 0; | |
// get minimum_width=leading/trailing space, precision=leading zeros | |
if (cast_val(Size, precision) < len) { | |
precision = cast_val(Sint, len); | |
} | |
Sint zeros_head_tail = (precision + | |
cast_val(Sint, head_size) + | |
cast_val(Sint, tail_size)); | |
if (minimum_width < zeros_head_tail) { | |
minimum_width = zeros_head_tail; | |
} | |
minimum_width -= zeros_head_tail; | |
precision -= cast_val(Sint, len); | |
// handle right justify and leading zeros | |
if ((flags & Print_Flags_LEFT_JUSTIFIED) == 0) { | |
if (flags & Print_Flags_LEADING_ZERO) { // then everything is in precision | |
precision = (minimum_width > precision) ? minimum_width : precision; | |
minimum_width = 0; | |
} | |
} | |
// copy leading spaces | |
if ((minimum_width + precision) > 0) { | |
// copy leading spaces (or when doing %8.4d stuff) | |
if ((flags & Print_Flags_LEFT_JUSTIFIED) == 0) { | |
for (Sint i = 0; i < minimum_width; i++) { | |
PRINT_ADD_CHAR(' '); | |
} | |
} | |
for (Size i = 0; i < head_size; i++) { // copy the head | |
PRINT_ADD_CHAR(head_str[i]); | |
} | |
head_index += head_size; | |
for (Sint i = 0; i < precision; i++) { // copy leading zeros | |
PRINT_ADD_CHAR('0'); | |
} | |
} | |
if (head_index < head_size) { // copy the head | |
Size repeat = head_size - head_index; | |
for (Size i = 0; i < repeat; i++) { | |
PRINT_ADD_CHAR(head_str[i]); | |
} | |
head_index += repeat; | |
} | |
for (Size i = 0; i < len; i++) { // copy the string | |
PRINT_ADD_CHAR(str[i]); | |
} | |
str += len; | |
for (Size i = 0; i < tail_size; i++) { // copy the tail | |
PRINT_ADD_CHAR(tail_str[i]); | |
} | |
// handle the left justify | |
if (flags & Print_Flags_LEFT_JUSTIFIED) { | |
for (Sint i = 0; i < minimum_width; i++) { | |
PRINT_ADD_CHAR(' '); | |
} | |
} | |
fmt++; | |
} else if (fmt[0] =='\0') { | |
break; | |
} | |
} | |
if (buffer_index) { | |
callback(userdata, buffer, buffer_index, buffer_size); | |
} | |
#undef PRINT_ADD_CHAR | |
return total_size; | |
} | |
/************************************************************************************** | |
* Memory | |
*/ | |
header_function | |
void* memoryAlignUpPtrTo (void *p, Size align) | |
{ | |
Size k = align - 1LLU; | |
Size not_k = ~k; | |
Size up = cast_bit(Uptr, p) + k; | |
Size result_uptr = up & cast_val(Uptr, not_k); | |
void *result = cast_bit(void *, result_uptr); | |
return result; | |
} | |
header_function | |
Size memoryAlignUpSizeTo (Size s, Size align) | |
{ | |
Size k = align - 1LLU; | |
Size result = (s + k) & (~ k); | |
return result; | |
} | |
header_function | |
void* memoryAlignDownPtrTo (void *p, Size align) | |
{ | |
Byte *pb = cast_val(Byte *, p); | |
Size k = align - 1LLU; | |
Byte *result = cast_val(Byte *, memoryAlignUpPtrTo(pb - k, align)); | |
return result; | |
} | |
header_function | |
Size memoryAlignDownSizeTo (Size s, Size align) | |
{ | |
Size k = align - 1LLU; | |
Size result = memoryAlignUpSizeTo(s - k, align); | |
return result; | |
} | |
#if defined(ENV_LANG_C) && ENV_LANG_C >= 2011 | |
# define memoryAlignUpTo(x, a) _Generic((x), void*: memoryAlignUpPtrTo, Size:memoryAlignUpSizeTo)(x, a) | |
# define memoryAlignDownTo(x, a) _Generic((x), void*: memoryAlignDownPtrTo, Size:memoryAlignDownSizeTo)(x, a) | |
#elif defined(ENV_LANG_CXX) | |
header_function void* memoryAlignUpTo (void *p, Size align) { return memoryAlignUpPtrTo(p, align); } | |
header_function Size memoryAlignUpTo (Size s, Size align) { return memoryAlignUpSizeTo(s, align); } | |
header_function void* memoryAlignDownTo (void *p, Size align) { return memoryAlignDownPtrTo(p, align); } | |
header_function Size memoryAlignDownTo (Size s, Size align) { return memoryAlignDownSizeTo(s, align); } | |
#endif | |
# define memoryAlignUp(x) (memoryAlignUpTo(x, alignof(max_align_t))) | |
# define memoryAlignDown(x) (memoryAlignDownTo(x, alignof(max_align_t))) | |
#define aligned_sizeof(x) memoryAlignUp(sizeof(x)) | |
typedef struct Memory_Allocator_Interface Memory_Allocator_Interface; | |
#define MEMORY_ALLOCATOR_REQUEST_FUNC(name) void* name (void *userdata, Size amount) | |
#define MEMORY_ALLOCATOR_RESCIND_FUNC(name) void name (void *userdata, void *ptr) | |
struct Memory_Allocator_Interface { | |
void *userdata; | |
MEMORY_ALLOCATOR_REQUEST_FUNC((*request)); | |
MEMORY_ALLOCATOR_RESCIND_FUNC((*rescind)); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
Bool skip_address_sanitizer; | |
#endif | |
}; | |
header_function | |
Memory_Allocator_Interface memICreate (void *userdata, MEMORY_ALLOCATOR_REQUEST_FUNC((*request)), MEMORY_ALLOCATOR_RESCIND_FUNC((*rescind))) | |
{ | |
Memory_Allocator_Interface mif; | |
memzero(&mif); | |
mif.userdata = userdata; | |
mif.request = request; | |
mif.rescind = rescind; | |
return mif; | |
} | |
// NOTE(naman): Since memIRescind poisons the memory in order to prevent use after free, we might get an | |
// "use after poisoning" error if that memory was to be reallocated. This might happen if the miface is being | |
// used to wrap a libc/system allocator. Thus, in such case, this function should be called to ensure that | |
// we don't end up with strange crashes upon memory allocation. | |
header_function | |
void memIDisableASAN (Memory_Allocator_Interface miface) | |
{ | |
unused_variable(miface); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
miface.skip_address_sanitizer = true; | |
#endif | |
} | |
header_function | |
void* memIRequest (Memory_Allocator_Interface miface, Size size) | |
{ | |
#if defined(ENV_SANITIZER_ADDRESS) | |
Size real_size = size + alignof(max_align_t); | |
Byte *orig_ptr = cast_val(Byte*, miface.request(miface.userdata, real_size)); | |
Byte *ptr = orig_ptr + (real_size - size); | |
if (miface.skip_address_sanitizer == false) { | |
ASAN_UNPOISON_MEMORY_REGION(orig_ptr, real_size); | |
} | |
Size *s = cast_val(Size*, cast_val(void*, orig_ptr)); | |
static_assert(sizeof(*s) <= alignof(max_align_t), "Header is too small to store size"); | |
*s = size; | |
if (miface.skip_address_sanitizer == false) { | |
ASAN_POISON_MEMORY_REGION(orig_ptr, alignof(max_align_t)); | |
} | |
return ptr; | |
#else | |
return miface.request(miface.userdata, size); | |
#endif | |
} | |
header_function | |
void memIRescind (Memory_Allocator_Interface miface, void *ptr) | |
{ | |
#if defined(ENV_SANITIZER_ADDRESS) | |
Byte *p = cast_val(Byte*, ptr); | |
void *orig_ptr = p - alignof(max_align_t); | |
if (miface.skip_address_sanitizer == false) { | |
ASAN_UNPOISON_MEMORY_REGION(orig_ptr, alignof(max_align_t)); | |
} | |
Size *s = cast_val(Size*, orig_ptr); | |
static_assert(sizeof(*s) <= alignof(max_align_t), "Header is too small to fetch size"); | |
Size size = *s; | |
Size real_size = size + alignof(max_align_t); | |
if (miface.skip_address_sanitizer == false) { | |
ASAN_POISON_MEMORY_REGION(orig_ptr, real_size); | |
} | |
miface.rescind(miface.userdata, orig_ptr); | |
#else | |
miface.rescind(miface.userdata, ptr); | |
#endif | |
} | |
/***************************************************************************************** | |
* Memory Push Allocator | |
*/ | |
typedef struct Memory_Push_Block_ { | |
struct Memory_Push_Block_ *next; | |
Size cap; | |
Size len; | |
Byte _pad[8]; | |
Byte data[]; | |
} Memory_Push_Block_; | |
static_assert(offsetof(Memory_Push_Block_, data) % alignof(max_align_t) == 0, "The data member has to be properly aligned"); | |
typedef struct Memory_Push { | |
Memory_Allocator_Interface parent_miface; | |
Memory_Allocator_Interface miface; | |
Memory_Push_Block_ *first, *last; | |
Size size; | |
} Memory_Push; | |
header_function void* memPushAllot (Memory_Push *push, Size size); | |
header_function | |
MEMORY_ALLOCATOR_REQUEST_FUNC(memPush_IfaceAllocate) | |
{ | |
Memory_Push *mp = cast_val(Memory_Push*, userdata); | |
void *ptr = memPushAllot(mp, amount); | |
return ptr; | |
} | |
header_function | |
MEMORY_ALLOCATOR_RESCIND_FUNC(memPush_IfaceDeallocate) | |
{ | |
Memory_Push *mp = cast_val(Memory_Push*, userdata); | |
unused_variable(mp); | |
unused_variable(ptr); | |
} | |
header_function | |
Memory_Allocator_Interface memPushGetInterface (Memory_Push *mp) | |
{ | |
return mp->miface; | |
} | |
header_function | |
Size memPush_PaddingForAlignment (void *ptr, Size offset, Size alignment) | |
{ | |
Uptr old_location = cast_bit(Uptr, ptr) + offset; | |
if (old_location % alignment) { | |
Uptr next_location = old_location + alignment; | |
Uptr align_location = (next_location/alignment)*alignment; | |
Uptr diff = align_location - old_location; | |
return diff; | |
} | |
return 0; | |
} | |
header_function | |
void* memPush_AcquireMemory (Memory_Allocator_Interface miface, Size size) | |
{ | |
Size size_total = sizeof(Memory_Push_Block_) + size; | |
Memory_Push_Block_ *block = cast_val(Memory_Push_Block_ *, memIRequest(miface, size_total)); | |
memzero(block); | |
block->cap = size; | |
block->len = 0; | |
#if defined(ENV_SANITIZER_ADDRESS) | |
ASAN_POISON_MEMORY_REGION(block->data, block->cap); | |
#endif | |
return block; | |
} | |
header_function | |
void memPushCreate (Memory_Push *mp, Memory_Allocator_Interface miface, Size size) | |
{ | |
memzero(mp); | |
mp->parent_miface = miface; | |
mp->miface = memICreate(mp, memPush_IfaceAllocate, memPush_IfaceDeallocate); | |
mp->size = size; | |
mp->first = cast_val(Memory_Push_Block_ *, memPush_AcquireMemory(miface, size)); | |
mp->last = mp->first; | |
memzero(mp->first); | |
mp->first->cap = size; | |
} | |
header_function | |
void memPushDelete (Memory_Push *push) | |
{ | |
for (Memory_Push_Block_ *mpb = push->first, *mpbn; mpb != nullptr; mpb = mpbn) { | |
mpbn = mpb->next; | |
#if defined(ENV_SANITIZER_ADDRESS) | |
ASAN_UNPOISON_MEMORY_REGION(mpb->data, mpb->cap); | |
#endif | |
memIRescind(push->parent_miface, mpb); | |
} | |
memzero(push); | |
} | |
header_function | |
void* memPushAllotA (Memory_Push *push, Size size, Size alignment) | |
{ | |
Size pad = memPush_PaddingForAlignment(push->last->data, push->last->len, alignment); | |
if ((push->last->len + pad + size) > push->last->cap) { | |
if (push->last->next == nullptr) { | |
Size s = maximum(push->size, size) + alignment; | |
push->last->next = cast_val(Memory_Push_Block_ *, memPush_AcquireMemory(push->parent_miface, s)); | |
push->last->next->cap = s; | |
} | |
push->last = push->last->next; | |
} | |
pad = memPush_PaddingForAlignment(push->last->data, push->last->len, alignment); | |
push->last->len += pad; | |
void *m = push->last->data + push->last->len; | |
push->last->len += size; | |
#if defined(ENV_SANITIZER_ADDRESS) | |
ASAN_UNPOISON_MEMORY_REGION(m, size); | |
#endif | |
return m; | |
} | |
header_function | |
void* memPushAllot (Memory_Push *push, Size size) | |
{ | |
return memPushAllotA(push, size, alignof(max_align_t)); | |
} | |
header_function | |
void memPushRemitAll (Memory_Push *push) | |
{ | |
for (Memory_Push_Block_ *mpb = push->first; mpb != nullptr; mpb = mpb->next) { | |
mpb->len = 0; | |
#if defined(ENV_SANITIZER_ADDRESS) | |
ASAN_POISON_MEMORY_REGION(mpb->data, mpb->cap); | |
#endif | |
} | |
push->last = push->first; | |
} | |
/************************************************************************************************ | |
* Memory Slab Allocator | |
*/ | |
typedef struct Memory_Slab { | |
Free_Grid *fg; | |
Memory_Allocator_Interface miface; | |
void *buffer; | |
Size memory_size; | |
Size elem_size; | |
Size elem_count; | |
Size elem_allocated; | |
} Memory_Slab; | |
header_function | |
void slabCreate (Memory_Slab *s, Memory_Allocator_Interface miface, Uint32 elem_count, Size elem_size) | |
{ | |
memzero(s); | |
s->miface = miface; | |
s->elem_count = elem_count; | |
s->elem_size = elem_size; | |
s->memory_size = elem_size * elem_count; | |
s->fg = cast_val(Free_Grid*, memIRequest(miface, sizeof(*s->fg))); | |
fgCreate(s->fg, elem_count); | |
s->buffer = memIRequest(miface, s->memory_size); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
ASAN_POISON_MEMORY_REGION(s->buffer, s->memory_size); | |
#endif | |
} | |
header_function | |
void slabDelete (Memory_Slab *s) | |
{ | |
#if defined(ENV_SANITIZER_ADDRESS) | |
ASAN_UNPOISON_MEMORY_REGION(s->buffer, s->memory_size); | |
#endif | |
memIRescind(s->miface, s->buffer); | |
fgDelete(s->fg); | |
} | |
header_function | |
Size slabGetIndexOfPtr (Memory_Slab *s, void *m) | |
{ | |
Uptr offset = cast_bit(Uptr, m) - cast_bit(Uptr, s->buffer); | |
Uptr index = offset / s->elem_size; | |
return index; | |
} | |
header_function | |
void* slabGetPtrOfIndex (Memory_Slab *s, Size index) | |
{ | |
void *result = cast_val(Byte *, s->buffer) + (index * s->elem_size); | |
return result; | |
} | |
header_function | |
void* slabRequest (Memory_Slab *s) | |
{ | |
debugAssert(s->elem_allocated < s->elem_count); | |
Uint32 index; | |
debugAssert(fgAllocate(s->fg, &index)); | |
void *result = slabGetPtrOfIndex(s, index); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
ASAN_UNPOISON_MEMORY_REGION(result, s->elem_size); | |
#endif | |
s->elem_allocated++; | |
return result; | |
} | |
header_function | |
void slabRescind (Memory_Slab *s, void *m) | |
{ | |
debugAssert(s->elem_allocated); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
ASAN_POISON_MEMORY_REGION(m, s->elem_size); | |
#endif | |
Uint32 index = cast_val(Uint32, (cast_val(Dptr, (cast_val(Byte *, m) - cast_val(Byte *, s->buffer)))/cast_val(Dptr, s->elem_size))); | |
fgFree(s->fg, index); | |
s->elem_allocated--; | |
} | |
/************************************************************************************************ | |
* TLSF Allocator | |
* -------------- | |
* | |
* The following is a sample table with buffer_size = GiB(4), min_alloc = MiB(1) and increment_step = 8 | |
* Each row is primary level, where the buffer is divided into units of size being consecutive powers of two | |
* (geometric progression). Each column them breaks each unit into a linear sub-unit of size being a constant | |
* increment (arithmetic progression). | |
* | |
* ________________________________________________________________________________________________________________________ | |
* | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | |
* |----|-------|---------------|--------------|--------------|--------------|--------------|--------------|--------------| | |
* | 31 | 2G | 2G+256M | 2G+512M | 2G+768M | 3G | 3G+256M | 3G+512M | 3G+768M | | |
* | 30 | 1G | 1G+128M | 1G+256M | 1G+384M | 1G+512M | 1G+640M | 1G+768M | 1G+896M | | |
* | 29 | 512M | 576M | 640M | 704M | 768M | 832M | 896M | 960M | | |
* | 28 | 256M | 288M | 320M | 352M | 384M | 416M | 448M | 480M | | |
* | 27 | 128M | 144M | 160M | 176M | 192M | 208M | 224M | 240M | | |
* | 26 | 64M | 72M | 80M | 88M | 96M | 104M | 112M | 120M | | |
* | 25 | 32M | 36M | 40M | 44M | 48M | 52M | 56M | 60M | | |
* | 24 | 16M | 18M | 20M | 22M | 24M | 26M | 28M | 30M | | |
* | 23 | 8M | 9M | 10M | 11M | 12M | 13M | 14M | 15M | | |
* | 22 | 4M | 4M+512K | 5M | 5M+512K | 6M | 6M+512K | 7M | 7M+512K | | |
* | 21 | 2M | 2M+256K | 2M+512K | 2M+768K | 3M | 3M+256K | 3M+512K | 3M+768K | | |
* | 20 | 1M | 1M+128K | 1M+256K | 1M+384K | 1M+512K | 1M+640K | 1M+768K | 1M+896K | | |
* |------------|---------------|--------------|--------------|--------------|--------------|--------------|--------------| | |
* | |
* Reference: | |
* [1] : http://www.gii.upv.es/tlsf/files/papers/tlsf_desc.pdf | |
* [2] : http://www.gii.upv.es/tlsf/files/papers/ecrts04_tlsf.pdf | |
* [3] : http://www.gii.upv.es/tlsf/files/papers/jrts2008.pdf | |
*/ | |
// TODO(naman): Right now, this is 64-bytes large. Once I am sure that the allocator is bug-free (i.e., once it has been | |
// tested enough), rename `next_slot_available_for_dispensing` and `next_free` to `next` and `prev_free` to `prev`, since a slot can either | |
// be empty or house a free node but not both. Also, store the `allocated` bool in the top bit of `size`. That will drop the | |
// size of the struct to 48-bytes, reducing the metadata size by close to 25%. | |
typedef struct TLSF_Node { | |
Uint64 offset; | |
Uint64 size; | |
// Contains the pointer to the next slot from which a node can be dispensed in case of out-of-band allocation | |
struct TLSF_Node *next_slot_available_for_dispensing; | |
// For free blocks. The next member of the node contains the index of the next node in chain corresponding to another | |
// free block in the same size class. The index of chain's head is stored in the free_table. | |
struct TLSF_Node *next_free, *prev_free; | |
// For allocated and free blocks. The following members contain the index of the nodes associated with the blocks right | |
// after and before them in the address space. | |
struct TLSF_Node *next_mem, *prev_mem; | |
Bool allocated; // false = free (make sure it's not left true for empty slots) | |
Byte _pad[7]; | |
} TLSF_Node; | |
static_assert(sizeof(TLSF_Node) % alignof(max_align_t) == 0, \ | |
"TLSF Nodes need to be of the same size as alignment, " \ | |
"so that they can be packed in the node_dispenser, etc."); | |
#define TLSF_INCREMENT_COUNT_PER_ROW 16 | |
typedef struct TLSF { | |
Memory_Allocator_Interface miface; | |
Uint64 buffer_size; | |
Uint64 min_alloc; | |
Uint32 rows, columns; | |
// One bit for each row, to show if any bit in the corresponding row_bitmap is set. | |
// Bitmap's LSB->MSB is bottom->top row | |
Uint64 column_bitap; | |
// One bitmap per row, with each bit standing for one cell. | |
// Index 0->N maps to bottom->top row, bitmap's LSB->MSB is left->right cell | |
Uint64 row_bitmaps[64]; | |
TLSF_Node* free_table[64][TLSF_INCREMENT_COUNT_PER_ROW]; | |
union { // Both point to TLSF->data | |
Byte *memory; // In-place memory allocator | |
TLSF_Node *node_dispenser; // Out-of-band 64-bit address space allocator | |
}; | |
TLSF_Node *first_slot_available_for_dispensing; // If this is NULL, we are dealing with in-place memory allocator | |
Byte data[]; | |
} TLSF; | |
#define TLSF_HEAD_OF_FREE_LIST_(t, r, c) ((t)->free_table[(r)][(c)]) | |
// sl = log2(size) | |
// ml = log2(min_alloc) | |
// il = log2(increment) | |
// | |
// r = sl - ml | |
// c = (size - 2^sl) / ((2*2^sl - 2^sl)/2^il) | |
// = (size - 2^sl) / (2^sl*(2 - 1) / 2^il) | |
// = (size - 2^sl) / (2^sl / 2^il) | |
// = (size - 2^sl) / (2^(sl-il)) | |
// = (size/2^(sl-il)) - (2^sl-(2^(sl-il))) | |
// = (size/2^(sl-il)) - 2^il | |
// = (size >> (sl-il)) - 2^il | |
// = (size >> (sl-il)) - (1 << il) | |
#define TLSF_ROW_(t, s) cast_val(Uint8, (bitLog2U64(s) - bitLog2U64((t)->min_alloc))) | |
#define TLSF_COL_(t, s) cast_val(Uint8, (((s) >> (bitLog2U64(s) - bitLog2U64(cast_val(Uint64, ((t)->columns))))) - \ | |
(1ULL << bitLog2U64(cast_val(Uint64, ((t)->columns)))))) | |
#define TLSF_OOB_(t) ((t)->first_slot_available_for_dispensing) | |
// NOTE(naman): All three parameters should be powers of two with value >= 2. | |
// buffer_size should be greater than min_alloc | |
#define tlsfCreate(_miface, _bufsz, _minal) tlsfCreateEx(_miface, _bufsz, _minal, true) | |
header_function | |
TLSF* tlsfCreateEx (Memory_Allocator_Interface miface, | |
Uint64 buffer_size, Uint64 min_alloc, | |
Bool out_of_band_metadata) | |
{ | |
Uint8 increment_steps = 16; | |
Uint8 buffer_size_bits = bitLog2U64(buffer_size); | |
Uint8 min_alloc_bits = bitLog2U64(min_alloc); | |
Uint8 row_count = cast_val(Uint8, buffer_size_bits - min_alloc_bits); | |
Size maximum_allocation_count = buffer_size / min_alloc; | |
Size total_size = memoryAlignUpTo(sizeof(TLSF), alignof(max_align_t)); | |
if (out_of_band_metadata) { | |
Size list_size = maximum_allocation_count * sizeof(TLSF_Node); | |
total_size += memoryAlignUpTo(list_size, alignof(max_align_t)); | |
} else { | |
total_size += memoryAlignUpTo(buffer_size, alignof(max_align_t)); | |
} | |
TLSF *tlsf = cast_val(TLSF *, memIRequest(miface, total_size)); | |
memzero(tlsf); | |
tlsf->miface = miface; | |
tlsf->buffer_size = buffer_size; | |
tlsf->min_alloc = min_alloc; | |
tlsf->rows = row_count; | |
tlsf->columns = increment_steps; | |
if (out_of_band_metadata) { | |
pragma_clang("clang diagnostic push"); | |
pragma_clang("clang diagnostic ignored \"-Wcast-align\""); | |
tlsf->node_dispenser = cast_ptr(TLSF_Node *, tlsf->data); | |
pragma_clang("clang diagnostic pop"); | |
tlsf->first_slot_available_for_dispensing = &tlsf->node_dispenser[0]; | |
// Put all available nodes in a list so that we can find the next one in O(1) for real-time | |
for (Size i = 0; i < maximum_allocation_count-1; i++) { | |
memzero(&tlsf->node_dispenser[i]); | |
tlsf->node_dispenser[i].next_slot_available_for_dispensing = &tlsf->node_dispenser[i] + 1; | |
} | |
} else { | |
tlsf->memory = tlsf->data; | |
tlsf->first_slot_available_for_dispensing = nullptr; | |
tlsf->min_alloc += sizeof(TLSF_Node); // min_alloc is only used for checking if enough memory is available | |
} | |
{ // Adding the full buffer in free_table | |
TLSF_Node node; | |
memzero(&node); | |
node.offset = 0; | |
node.size = buffer_size - 1; | |
Uint8 row = TLSF_ROW_(tlsf, node.size); | |
Uint8 col = TLSF_COL_(tlsf, node.size); | |
TLSF_Node *slot_for_full_buffer = nullptr; | |
if (out_of_band_metadata) { | |
slot_for_full_buffer = tlsf->first_slot_available_for_dispensing; | |
tlsf->first_slot_available_for_dispensing = slot_for_full_buffer->next_slot_available_for_dispensing; | |
} else { | |
pragma_clang("clang diagnostic push"); | |
pragma_clang("clang diagnostic ignored \"-Wcast-align\""); | |
slot_for_full_buffer = cast_ptr(TLSF_Node *, tlsf->memory); | |
pragma_clang("clang diagnostic pop"); | |
} | |
*slot_for_full_buffer = node; | |
TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col) = slot_for_full_buffer; | |
tlsf->column_bitap |= (1ULL << row); | |
tlsf->row_bitmaps[row] |= (1ULL << col); | |
} | |
return tlsf; | |
} | |
header_function | |
void tlsfDelete (TLSF *tlsf) | |
{ | |
Memory_Allocator_Interface miface = tlsf->miface; | |
memIRescind(miface, tlsf); | |
} | |
header_function | |
void tlsf_AddNodeToFreeList (TLSF *tlsf, TLSF_Node *node) | |
{ | |
Size row = TLSF_ROW_(tlsf, node->size); // fl in [1] | |
Size col = TLSF_COL_(tlsf, node->size); // sl in [1] | |
// Add node to free list | |
node->prev_free = nullptr; | |
node->next_free = TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col); | |
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col)) { | |
TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col)->prev_free = node; | |
} | |
// Add node to table | |
TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col) = node; | |
// Update bitmap | |
tlsf->row_bitmaps[row] |= (1ULL << col); | |
tlsf->column_bitap |= (1ULL << row); | |
} | |
header_function | |
void tlsf_RemoveNodeFromFreeList (TLSF *tlsf, TLSF_Node *node) | |
{ | |
Size row = TLSF_ROW_(tlsf, node->size); // fl in [1] | |
Size col = TLSF_COL_(tlsf, node->size); // sl in [1] | |
// Remove node from the free list | |
if (node->next_free) node->next_free->prev_free = node->prev_free; | |
if (node->prev_free) node->prev_free->next_free = node->next_free; | |
// Remove node from table | |
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col) == node) { | |
TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col) = node->next_free; | |
} | |
// Update bitmap | |
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col) == nullptr) { // No free blocks left in that size range | |
tlsf->row_bitmaps[row] &= ~(1ULL << col); | |
if (tlsf->row_bitmaps[row] == 0) { | |
tlsf->column_bitap &= ~(1ULL << row); | |
} | |
} | |
} | |
header_function | |
TLSF_Node* tlsf_AcquireNode (TLSF *tlsf, Size offset) | |
{ | |
TLSF_Node *result; | |
// Find a empty slot in list and put the splitted node in it | |
if (TLSF_OOB_(tlsf)) { | |
result = tlsf->first_slot_available_for_dispensing; | |
tlsf->first_slot_available_for_dispensing = result->next_slot_available_for_dispensing; | |
} else { | |
pragma_clang("clang diagnostic push"); | |
pragma_clang("clang diagnostic ignored \"-Wcast-align\""); | |
result = cast_ptr(TLSF_Node *, &tlsf->data[offset]); | |
pragma_clang("clang diagnostic pop"); | |
} | |
memzero(result); | |
return result; | |
} | |
header_function | |
void tlsf_ReleaseNode (TLSF *tlsf, TLSF_Node *node) | |
{ | |
if (TLSF_OOB_(tlsf)) { // Add node into the slot dispenser | |
node->next_slot_available_for_dispensing = tlsf->first_slot_available_for_dispensing; | |
tlsf->first_slot_available_for_dispensing = node; | |
} | |
} | |
// WARN(naman): When using with in-band memory, make sure to add the sizeof(TLSF_Node) to size_requested | |
// before calling this function. | |
header_function | |
TLSF_Node* tlsfAllocate (TLSF *tlsf, Size size_requested) | |
{ | |
if (size_requested > tlsf->buffer_size) return nullptr; | |
Size size = maximum(size_requested, tlsf->min_alloc); | |
// The next slot for size (so that we do a loop-less good fit for real-time instead of a looped best fit) | |
// sl = log2(size) | |
// il = log2(increment) | |
// | |
// gap = size_of_each_column_in_that_row = ((2*2^sl - 2^sl)/2^il) = 2^(sl-il) | |
// size_next = size + 2^(sl-il) | |
// However, in case of size = 2^sl, we don't want to go to the next block. Thus, we should subtract 1 from size_next, | |
// so that we end up back in the same table slot in that particular case. | |
// => size_next = size + 2^(sl-il) - 1 | |
Size size_class = size + (1ULL << (bitLog2U64(size) - bitLog2U64(cast_val(Uint64, tlsf->columns)))) - 1; | |
size_class = minimum(size_class, tlsf->buffer_size-1); | |
Size row = TLSF_ROW_(tlsf, size_class); // fl in [1] | |
Size col = TLSF_COL_(tlsf, size_class); // sl in [1] | |
Uint64 row_bitmap = tlsf->row_bitmaps[row] & (0xFFFFFFFFFFFFFFFFULL << col); | |
Size row_available, col_available; | |
if (row_bitmap) { | |
col_available = bitLSBU64(row_bitmap); | |
row_available = row; | |
} else { | |
Uint64 col_bitmap = tlsf->column_bitap & (0xFFFFFFFFFFFFFFFFULL << (row + 1)); | |
if (col_bitmap) { | |
row_available = bitLSBU64(col_bitmap); | |
col_available = bitLSBU64(tlsf->row_bitmaps[row_available]); | |
} else { | |
return nullptr; | |
} | |
} | |
// Get the free node of the free list and mark it used | |
TLSF_Node *free_node = TLSF_HEAD_OF_FREE_LIST_(tlsf, row_available, col_available); // This will definitely succeed, so free_node != nullptr | |
// WARN(naman): Don't zero out free_node. Some of it's contents are used below and some are carried forward. | |
tlsf_RemoveNodeFromFreeList(tlsf, free_node); | |
TLSF_Node *split_node_slot = nullptr; | |
if (free_node->size >= (size + tlsf->min_alloc)) { // If the acquired block is too big | |
Size split_node_size = free_node->size - size; | |
Size split_offset = free_node->offset + size; | |
split_node_slot = tlsf_AcquireNode(tlsf, split_offset); | |
split_node_slot->offset = split_offset; | |
split_node_slot->size = split_node_size; | |
split_node_slot->prev_mem = free_node; | |
split_node_slot->next_mem = free_node->next_mem; | |
tlsf_AddNodeToFreeList(tlsf, split_node_slot); | |
} | |
{ // Create a node for the allocated block | |
// free_node->offset = offset; // Already set | |
free_node->size = size; | |
free_node->next_slot_available_for_dispensing = nullptr; | |
free_node->next_free = nullptr; | |
free_node->prev_free = nullptr; | |
free_node->next_mem = split_node_slot; | |
// free_node->prev_mem = prev_mem; // Already set | |
free_node->allocated = true; | |
} | |
return free_node; | |
} | |
header_function | |
TLSF_Node* tlsf_MergeLeft (TLSF *tlsf, TLSF_Node *node) | |
{ | |
TLSF_Node *left = node->prev_mem; | |
if (left && !left->allocated) { | |
Size node_size_old = node->size; | |
Size left_size_old = left->size; | |
Size left_size_new = node_size_old + left_size_old; | |
tlsf_RemoveNodeFromFreeList(tlsf, left); | |
{ // Remove node from the memory list | |
left->next_mem = node->next_mem; | |
if (left->next_mem) left->next_mem->prev_mem = left; | |
} | |
tlsf_ReleaseNode(tlsf, node); | |
if (!node->allocated) { // If the node to be expanded is free (e.g., during deallocation) | |
tlsf_RemoveNodeFromFreeList(tlsf, node); | |
left->size = left_size_new; // Updated here, so that tlsf_AddNodeToFreeList below works | |
tlsf_AddNodeToFreeList(tlsf, left); | |
} else { | |
left->allocated = true; | |
node->allocated = false; | |
left->size = left_size_new; // Still has to be updated if node->allocated == true | |
} | |
return left; | |
} else { | |
return node; | |
} | |
} | |
header_function | |
TLSF_Node* tlsf_MergeRight (TLSF *tlsf, TLSF_Node *node) | |
{ | |
TLSF_Node *right = node->next_mem; | |
if (right && !right->allocated) { | |
Size node_size_old = node->size; | |
Size right_size_old = right->size; | |
Size node_size_new = node_size_old + right_size_old; | |
tlsf_RemoveNodeFromFreeList(tlsf, right); | |
{ // Remove right from the memory list | |
node->next_mem = right->next_mem; | |
if (node->next_mem) node->next_mem->prev_mem = node; | |
} | |
tlsf_ReleaseNode(tlsf, right); | |
if (!node->allocated) { // If the node to be expanded is free (e.g., during deallocation) | |
tlsf_RemoveNodeFromFreeList(tlsf, node); | |
node->size = node_size_new; // Updated here, so that tlsf_AddNodeToFreeList below works | |
tlsf_AddNodeToFreeList(tlsf, node); | |
} else { | |
node->size = node_size_new; // Still has to be updated if node->allocated == true | |
} | |
} | |
return node; | |
} | |
// WARN(naman): When using with in-band memory, make sure to add the sizeof(TLSF_Node) to size_requested | |
// before calling this function. | |
header_function | |
Bool tlsfAdjustLeft (TLSF *tlsf, TLSF_Node **node, Size new_size) | |
{ | |
if (!(*node)->allocated) return false; | |
*node = tlsf_MergeLeft(tlsf, *node); | |
new_size = maximum(new_size, tlsf->min_alloc); | |
if ((*node)->size < new_size) return false; | |
Size remnant_offset = (*node)->offset; | |
Size remnant_size = (*node)->size - new_size; | |
if (remnant_size == 0) return true; | |
if (remnant_size < tlsf->min_alloc) return true; | |
{ // Add a remnnant node | |
TLSF_Node *remnant = tlsf_AcquireNode(tlsf, remnant_offset); | |
// Size it properly | |
remnant->offset = remnant_offset; | |
remnant->size = remnant_size; | |
(*node)->offset += remnant->size; | |
(*node)->size = new_size; | |
// Add to memory list | |
remnant->next_mem = *node; | |
remnant->prev_mem = (*node)->prev_mem; | |
if ((*node)->prev_mem) (*node)->prev_mem->next_mem = remnant; | |
(*node)->prev_mem = remnant; | |
tlsf_AddNodeToFreeList(tlsf, remnant); | |
} | |
return true; | |
} | |
// WARN(naman): When using with in-band memory, make sure to add the sizeof(TLSF_Node) to size_requested | |
// before calling this function. | |
header_function | |
Bool tlsfAdjustRight (TLSF *tlsf, TLSF_Node **node, Size new_size) | |
{ | |
if (!(*node)->allocated) return false; | |
*node = tlsf_MergeRight(tlsf, *node); | |
new_size = maximum(new_size, tlsf->min_alloc); | |
if ((*node)->size < new_size) return false; | |
Size remnant_offset = (*node)->offset + new_size; | |
Size remnant_size = (*node)->size - new_size; | |
if (remnant_size == 0) return true; | |
if (remnant_size < tlsf->min_alloc) return true; | |
{ // Add a remnnant node | |
TLSF_Node *remnant = tlsf_AcquireNode(tlsf, remnant_offset); | |
// Size it properly | |
remnant->offset = remnant_offset; | |
remnant->size = remnant_size; | |
// node->offset doesn't change | |
(*node)->size = new_size; | |
// Add to memory list | |
remnant->prev_mem = *node; | |
remnant->next_mem = (*node)->next_mem; | |
if ((*node)->next_mem) (*node)->next_mem->prev_mem = remnant; | |
(*node)->next_mem = remnant; | |
tlsf_AddNodeToFreeList(tlsf, remnant); | |
} | |
return true; | |
} | |
header_function | |
Bool tlsfFree (TLSF *tlsf, TLSF_Node *node) | |
{ | |
if (!node->allocated) return false; | |
node = tlsf_MergeLeft(tlsf, node); | |
node = tlsf_MergeRight(tlsf, node); | |
tlsf_AddNodeToFreeList(tlsf, node); | |
node->allocated = false; | |
return true; | |
} | |
header_function | |
Size tlsfGetSize (TLSF *tlsf, TLSF_Node *node) | |
{ | |
unused_variable(tlsf); | |
if (!node->allocated) return 0; | |
return node->size; | |
} | |
/************************************************************************************************ | |
* TLSF-based General Purpose Memory Allocator | |
*/ | |
typedef struct Memory { | |
TLSF *tlsf; | |
Memory_Allocator_Interface parent_miface; | |
Size max_mem; | |
} Memory; | |
// NOTE(naman): With (512 MiB, 32 KiB, 16), it will need slightly more than 1 MiB of metadata. | |
header_function | |
Memory memCreate (Memory_Allocator_Interface parent_miface, Size memory_size) | |
{ | |
Size max_mem = memory_size; | |
Size min_mem = bitNextPowerOf2(sizeof(TLSF_Node)); // To get atleast 50% load | |
debugAssert(max_mem > min_mem); | |
debugAssert(bitIsPowerOf2(max_mem)); | |
debugAssert(bitIsPowerOf2(min_mem)); | |
TLSF *tlsf = tlsfCreateEx(parent_miface, max_mem, min_mem, false); | |
Memory memory = { | |
.tlsf = tlsf, | |
.parent_miface = parent_miface, | |
.max_mem = max_mem, | |
}; | |
return memory; | |
} | |
header_function | |
void memRemove (Memory *mt) | |
{ | |
tlsfDelete(mt->tlsf); | |
} | |
header_function | |
void* memAllocate (Memory *mt, Size size) | |
{ | |
Size real_size = size + sizeof(TLSF_Node); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
real_size += (2 * alignof(max_align_t)); | |
#endif | |
TLSF_Node *node = tlsfAllocate(mt->tlsf, real_size); | |
Byte *result = cast_ptr(Byte *, node + 1); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
Byte *san_header = result; | |
ASAN_POISON_MEMORY_REGION(san_header, alignof(max_align_t)); | |
result += alignof(max_align_t); | |
Byte *san_footer = result + size; | |
ASAN_POISON_MEMORY_REGION(san_footer, alignof(max_align_t)); | |
#endif | |
return result; | |
} | |
// For Memory_Allocator_Interface | |
header_function | |
void* mem_Allocate (void *mt, Size size) { | |
return memAllocate(cast_val(Memory*, mt), size); | |
} | |
// Return nullptr if extention failed, so that the application knows that more memory is not available | |
header_function | |
void* memExtend (Memory *mt, void *ptr, Size size) | |
{ | |
#if defined(ENV_SANITIZER_ADDRESS) | |
Byte *san_header_old = cast_ptr(Byte *, ptr) - alignof(max_align_t); | |
ASAN_UNPOISON_MEMORY_REGION(san_header_old, alignof(max_align_t)); | |
TLSF_Node *node = cast_ptr(TLSF_Node *, san_header_old) - 1; | |
Byte *san_footer_old = cast_ptr(Byte *, node) + tlsfGetSize(node) - alignof(max_align_t); | |
ASAN_UNPOISON_MEMORY_REGION(san_footer_old, alignof(max_align_t)); | |
#else | |
TLSF_Node *node = cast_ptr(TLSF_Node *, ptr) - 1; | |
#endif | |
Size real_size = size + sizeof(TLSF_Node); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
real_size += (2 * alignof(max_align_t)); | |
#endif | |
Bool adjusted = tlsfAdjustLeft(mt->tlsf, &node, real_size); | |
// NOTE(naman): If adjust succeeded, we need to poison the header and footer on newly allocated memory. | |
// If it failed, we need to poison for old one, since we unpoisoned it above. | |
Byte *result = cast_ptr(Byte *, node + 1); | |
#if defined(ENV_SANITIZER_ADDRESS) | |
Byte *san_header = result; | |
ASAN_POISON_MEMORY_REGION(san_header, alignof(max_align_t)); | |
result += alignof(max_align_t); | |
Byte *san_footer = result + size; | |
ASAN_POISON_MEMORY_REGION(san_footer, alignof(max_align_t)); | |
#endif | |
return adjusted ? result : nullptr; | |
} | |
header_function | |
void memFree (Memory *mt, void *ptr) | |
{ | |
#if defined(ENV_SANITIZER_ADDRESS) | |
Byte *san_header_old = cast_ptr(Byte *, ptr) - alignof(max_align_t); | |
ASAN_UNPOISON_MEMORY_REGION(san_header_old, alignof(max_align_t)); | |
TLSF_Node *node = cast_ptr(TLSF_Node *, san_header_old) - 1; | |
Byte *san_footer_old = cast_ptr(Byte *, node) + tlsfGetSize(node) - alignof(max_align_t); | |
ASAN_UNPOISON_MEMORY_REGION(san_footer_old, alignof(max_align_t)); | |
#else | |
TLSF_Node *node = cast_ptr(TLSF_Node *, ptr) - 1; | |
#endif | |
tlsfFree(mt->tlsf, node); | |
} | |
// For Memory_Allocator_Interface | |
header_function | |
void mem_Free (void *mt, void *ptr) | |
{ | |
memFree(cast_val(Memory*, mt), ptr); | |
} | |
header_function | |
Size memGetSize (Memory *mt, void *ptr) | |
{ | |
#if defined(ENV_SANITIZER_ADDRESS) | |
Byte *san_header_old = cast_ptr(Byte *, ptr) - alignof(max_align_t); | |
TLSF_Node *node = cast_ptr(TLSF_Node *, san_header_old) - 1; | |
#else | |
TLSF_Node *node = cast_ptr(TLSF_Node *, ptr) - 1; | |
#endif | |
return tlsfGetSize(mt->tlsf, node); | |
} | |
header_function | |
Memory_Allocator_Interface memGetInterface (Memory *mt) | |
{ | |
Memory_Allocator_Interface miface = memICreate(mt, mem_Allocate, mem_Free); | |
return miface; | |
} | |
/************************************************************************************************ | |
* Hashing | |
* ******* | |
*/ | |
// https://en.wikipedia.org/wiki/Pearson_hashing | |
header_function | |
Uint8 hash8 (const void *data, Size len, Uint8 seed) | |
{ | |
const Byte *d = cast_val(const Byte*, data); | |
Uint8 H = seed; | |
for (Size i = 0; i < len; i++) { | |
const Uint8 A = cast_val(Uint8, H ^ d[i]); | |
// Permutation function generated with https://programming.sirrida.de/calcperm.php using input "4 0 3 7 5 2 1 6" | |
const Uint8 B = cast_val(Uint8, (((A & 0x41) << 1) | | |
((A & 0x04) << 3) | | |
((A & 0x02) << 5) | | |
((A & 0x90) >> 4) | | |
((A & 0x28) >> 1))); | |
H = B; | |
} | |
return H; | |
} | |
// Reference: Fast CRCs by Gam D. Nguyen (https://arxiv.org/pdf/1009.5949) [Fig. 8, Page 14] | |
header_function | |
Uint16 hash16 (const void *data, Size len, Uint16 seed) | |
{ | |
const Byte *d = cast_val(const Byte*, data); | |
const Uint16 f = 0x7; | |
const Uint16 k = 0x8000; | |
Uint16 CRC = seed; | |
Size i; | |
// Run until zero or one bytes are left | |
for (i = 0; (i + 2) <= len; i = i + 2) { | |
const Uint16 hword = srlzRead16BE(d + i); | |
Uint16 a = cast_val(Uint16, CRC ^ hword); | |
Uint16 c; | |
if (a & k) { | |
c = cast_val(Uint16, (a << 1) ^ f); | |
} else { | |
c = cast_val(Uint16, a << 1); | |
} | |
if (c & k) { | |
CRC = cast_val(Uint16, (c << 1) ^ f); | |
} else { | |
CRC = cast_val(Uint16, c << 1); | |
} | |
CRC = cast_val(Uint16, CRC ^ c ^ a); | |
} | |
// Finish up the last byte | |
if ((i + 1) == len) { | |
const Uint16 hword = cast_val(Uint16, cast_val(Uint16, srlzRead8(d + i)) << 8); | |
Uint16 a = cast_val(Uint16, CRC ^ hword); | |
Uint16 c; | |
if (a & k) { | |
c = cast_val(Uint16, (a << 1) ^ f); | |
} else { | |
c = cast_val(Uint16, a << 1); | |
} | |
if (c & k) { | |
CRC = cast_val(Uint16, (c << 1) ^ f); | |
} else { | |
CRC = cast_val(Uint16, c << 1); | |
} | |
CRC = cast_val(Uint16, CRC ^ c ^ a); | |
} | |
return CRC; | |
} | |
// https://github.com/aappleby/smhasher/blob/0ff96f7835817a27d0487325b6c16033e2992eb5/src/MurmurHash3.cpp#L94-L146 | |
header_function | |
Uint32 hash32 (const void *data, Size len, Uint32 seed) | |
{ | |
const Byte *d = cast_val(const Byte*, data); | |
Uint32 H = seed; | |
const Uint32 c1 = 0xcc9e2d51; | |
const Uint32 c2 = 0x1b873593; | |
#if defined(ENV_COMPILER_MSVC) | |
# define ROTL32(x,y) _rotl(x,y) | |
#elif defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG) | |
# define ROTL32(x,y) ((x << y) | (x >> (32 - y))) | |
#endif | |
Size i; | |
// Run until zero or one bytes are left | |
for (i = 0; (i + 4) <= len; i = i + 4) { | |
Uint32 word = srlzRead32BE(d + i); | |
word *= c1; | |
word = ROTL32(word, 15); | |
word *= c2; | |
H ^= word; | |
H = ROTL32(H, 13); | |
H = H * 5 + 0xe6546b64; | |
} | |
{ // Finish up the remaining bytes | |
Size remaining = len - i; | |
Uint32 k = 0; | |
switch (remaining) { | |
case 3: k ^= cast_val(Uint32, d[i+2]) << 16; switch_fallthrough; | |
case 2: k ^= cast_val(Uint32, d[i+1]) << 8; switch_fallthrough; | |
case 1: k ^= cast_val(Uint32, d[i]); | |
k *= c1; | |
k = ROTL32(k, 15); | |
k *= c2; | |
H ^= k; | |
switch_fallthrough; | |
default: break; | |
} | |
} | |
#undef ROTL32 | |
H ^= len; | |
H ^= H >> 16; | |
H *= 0x85ebca6b; | |
H ^= H >> 13; | |
H *= 0xc2b2ae35; | |
H ^= H >> 16; | |
return H; | |
} | |
// https://github.com/aappleby/smhasher/blob/0ff96f7835817a27d0487325b6c16033e2992eb5/src/MurmurHash2.cpp#L88-L137 | |
header_function | |
Uint64 hash64 (const void *data, Size len, Uint64 seed) | |
{ | |
const Byte *d = cast_val(const Byte*, data); | |
const Uint64 m = 0xc6a4a7935bd1e995ULL; | |
const Sint r = 47; | |
Uint64 H = seed ^ (len * m); | |
Size i; | |
// Run until less than eight bytes are left | |
for (i = 0; (i + 8) <= len; i = i + 8) { | |
Uint64 dword = srlzRead64BE(d + i); | |
dword *= m; | |
dword ^= dword >> r; | |
dword *= m; | |
H ^= dword; | |
H *= m; | |
} | |
{ // Finish up the remaining bytes | |
Size remaining = len - i; | |
switch (remaining) { | |
case 7: H ^= cast_val(Uint64, d[i+6]) << 48; switch_fallthrough; | |
case 6: H ^= cast_val(Uint64, d[i+5]) << 40; switch_fallthrough; | |
case 5: H ^= cast_val(Uint64, d[i+4]) << 32; switch_fallthrough; | |
case 4: H ^= cast_val(Uint64, d[i+3]) << 24; switch_fallthrough; | |
case 3: H ^= cast_val(Uint64, d[i+2]) << 16; switch_fallthrough; | |
case 2: H ^= cast_val(Uint64, d[i+1]) << 8; switch_fallthrough; | |
case 1: H ^= cast_val(Uint64, d[i]); | |
H *= m; | |
switch_fallthrough; | |
default: break; | |
} | |
} | |
H ^= H >> r; | |
H *= m; | |
H ^= H >> r; | |
return H; | |
} | |
/************************************************************************************************ | |
* Tag and Label (Small immutable stack-allocated strings) | |
* *********************************************** | |
*/ | |
// We put the string at the end since most reads will be about comparing the hash and length; | |
// and even in cases when one has to read the string, the likelihood is that the string will be less | |
// than 248 characters long, meaning that later cache lines will never actually load. | |
typedef struct Label { | |
union { struct { Uint32 h32; Uint16 h16; Uint8 h8; Uint8 len; }; Uint64 _id; }; | |
Char str[247]; Char _zero; // = 0 | |
} Label; | |
static_assert(sizeof(Label) == 256, "Label incorrectly sized"); | |
#define labelStrCap() (elemin(valzero(Label).str)) | |
typedef struct Tag { | |
union { struct { Uint16 h16; Uint8 h8; Uint8 len; }; Uint32 _id; }; | |
Char str[27]; Char _zero; // = 0 | |
} Tag; | |
static_assert(sizeof(Tag) == 32, "Tag incorrectly sized"); | |
#define tagStrCap() (elemin(valzero(Tag).str)) | |
header_function | |
Label labelMakeL (const Char *str, Uint8 len) | |
{ | |
debugAssert(len <= labelStrCap()); | |
Label l = { | |
.len = len, | |
}; | |
memcpy(l.str, str, l.len); | |
l.h32 = hash32(l.str, l.len, 0); | |
l.h16 = hash16(l.str, l.len, 0); | |
l.h8 = hash8(l.str, l.len, 0); | |
return l; | |
} | |
header_function | |
Tag tagMakeL (const Char *str, Uint8 len) | |
{ | |
debugAssert(len <= tagStrCap()); | |
Tag l = { | |
.len = len, | |
}; | |
memcpy(l.str, str, l.len); | |
l.h16 = hash16(l.str, l.len, 0); | |
l.h8 = hash8(l.str, l.len, 0); | |
return l; | |
} | |
header_function Label labelMake (const Char *str) { return labelMakeL(str, cast_val(Uint8, strlen(str))); } | |
header_function Tag tagMake (const Char *str) { return tagMakeL (str, cast_val(Uint8, strlen(str))); } | |
header_function Uint8 labelLen (Label l) { return l.len; } | |
header_function Uint8 tagLen (Tag l) { return l.len; } | |
header_function Bool labelIsEqual (Label a, Label b) { return (a._id == b._id) && (memcmp(a.str, b.str, minimum(a.len, b.len)) == 0); } | |
header_function Bool tagIsEqual (Tag a, Tag b) { return (a._id == b._id) && (memcmp(a.str, b.str, minimum(a.len, b.len)) == 0); } | |
header_function | |
Bool labelIsEqualStr (Label a, const Char *b) | |
{ | |
Bool result = strleq(a.str, a.len, b); | |
return result; | |
} | |
header_function | |
Bool tagIsEqualStr (Tag a, const Char *b) | |
{ | |
Bool result = strleq(a.str, a.len, b); | |
return result; | |
} | |
header_function Bool labelIsEqualStrL (Label a, const Char *b, Size blen) { return (a.len == blen) && (memcmp(a.str, b, blen) == 0); } | |
header_function Bool tagIsEqualStrL (Tag a, const Char *b, Size blen) { return (a.len == blen) && (memcmp(a.str, b, blen) == 0); } | |
header_function | |
Bool marshalLabel (Label *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
if (!marshalUint8(&datum->len, func, funcdata, write, big_endian)) return false; | |
if (!func(datum->str, datum->len, funcdata)) return false; | |
if (!write) *datum = labelMakeL(datum->str, datum->len); | |
return true; | |
} | |
header_function | |
Bool marshalTag (Tag *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) { | |
if (!marshalUint8(&datum->len, func, funcdata, write, big_endian)) return false; | |
if (!func(datum->str, datum->len, funcdata)) return false; | |
if (!write) *datum = tagMakeL(datum->str, datum->len); | |
return true; | |
} | |
/************************************************************************************************ | |
* Txt (String Library) | |
* ******************** | |
*/ | |
typedef struct Txt_Arena { | |
Memory_Push mp; | |
} Txt_Arena; | |
header_function | |
Txt_Arena txtArenaCreate (Memory_Allocator_Interface mai) | |
{ | |
Txt_Arena ta = {}; | |
memPushCreate(&ta.mp, mai, MiB(1)); | |
return ta; | |
} | |
header_function | |
void txtArenaDelete (Txt_Arena *ta) | |
{ | |
memPushDelete(&ta->mp); | |
} | |
header_function | |
void txtArenaReset (Txt_Arena *ta) | |
{ | |
memPushRemitAll(&ta->mp); | |
} | |
typedef struct Txt { Char *str; } Txt; | |
typedef struct Txt_Header_ { | |
Uint32 len; | |
Char str[]; | |
} Txt_Header_; | |
header_function | |
Txt txt_Allocate (Txt_Arena *ta, Size len) | |
{ | |
Size size = len + 1 + sizeof(Txt_Header_); | |
Txt_Header_ *hdr = cast_val(Txt_Header_*, memPushAllotA(&ta->mp, size, alignof(Txt_Header_))); | |
debugAssert(len < UINT32_MAX); | |
hdr->len = cast_val(Uint32, len); | |
hdr->str[len] = '\0'; | |
Char *s = hdr->str; | |
Txt t = {.str = s}; | |
return t; | |
} | |
header_function | |
Txt txtNewL (Txt_Arena *ta, Char *s, Size len) | |
{ | |
Txt t = txt_Allocate(ta, len); | |
memcpy(t.str, s, len); | |
return t; | |
} | |
header_function | |
Txt txtNew (Txt_Arena *ta, Char *s) | |
{ | |
Size length = strlen(s); | |
Txt result = txtNewL(ta, s, length); | |
return result; | |
} | |
header_function | |
Char* txtStr (Txt t) | |
{ | |
Char *s = t.str; | |
return s; | |
} | |
header_function | |
Size txtLen (Txt t) | |
{ | |
Txt_Header_ *hdr = containerof(t.str, Txt_Header_, str); | |
return hdr->len; | |
} | |
header_function | |
Bool txt_EqN (Txt t1, Txt t2, Size len) | |
{ | |
Bool result = memcmp(t1.str, t2.str, len) == 0; | |
return result; | |
} | |
header_function | |
Bool txtEqTT (Txt t1, Txt t2) | |
{ | |
Size t1l = txtLen(t1); | |
Size t2l = txtLen(t2); | |
if (t1l != t2l) return false; | |
Bool result = txt_EqN(t1, t2, t1l); | |
return result; | |
} | |
header_function | |
Bool txtEqTC (Txt t1, Char *s2) | |
{ | |
Size t1l = txtLen(t1); | |
Size s2l = strlen(s2); | |
if (t1l != s2l) return false; | |
Bool result = memcmp(txtStr(t1), s2, t1l) == 0; | |
return result; | |
} | |
header_function | |
Bool txtEqTL (Txt t1, Label l2) | |
{ | |
Size t1l = txtLen(t1); | |
Size l2l = labelLen(l2); | |
if (t1l != l2l) return false; | |
Bool result = memcmp(txtStr(t1), l2.str, t1l) == 0; | |
return result; | |
} | |
#define txtEq(_a, _b) \ | |
_Generic((_b) \ | |
, Txt : txtEqTT \ | |
, Char* : txtEqTC \ | |
, Label : txtEqTL)(_a, _b) | |
header_function | |
Char* txtChr (Txt t, Char c) | |
{ | |
Size len = txtLen(t); | |
Char *str = txtStr(t); | |
for (Size i = 0; i < len; i++) { | |
if (str[i] == c) return &str[i]; | |
} | |
return nullptr; | |
} | |
header_function | |
Char* txtChrR (Txt t, Char c) | |
{ | |
Size len = txtLen(t); | |
Char *str = txtStr(t); | |
for (Size i = len - 1; i < len; i++) { | |
if (str[i] == c) return &str[i]; | |
} | |
return nullptr; | |
} | |
typedef struct Txt_Span { | |
Txt t; | |
Uint32 begin, end; // begin inclusive, end exclusive { [begin, end) } | |
Uint64 hash; | |
} Txt_Span; | |
header_function | |
Txt_Span txtGetSpan (Txt t, Sint64 begin, Sint64 end) | |
{ | |
if (begin < 0) begin = cast_val(Uint32, cast_val(Sint32, txtLen(t)) - (-begin)); | |
if (end < 0) end = cast_val(Uint32, cast_val(Sint32, txtLen(t)) - (-end)); | |
if (end <= begin) { | |
end = begin + 1; // So that we don't end up with unnecessary errors | |
} | |
if (end > cast_val(Sint, txtLen(t))) { | |
end = cast_val(Sint, txtLen(t)); | |
} | |
Txt_Span ts = { | |
.t = t, | |
.begin = cast_val(Uint32, begin), | |
.end = cast_val(Uint32, end), | |
.hash = hash64(txtStr(t) + begin, cast_val(Size, end - begin), 0), | |
}; | |
return ts; | |
} | |
header_function | |
Char* txtspanStr(Txt_Span ts) | |
{ | |
Char *str = txtStr(ts.t) + ts.begin; | |
return str; | |
} | |
header_function | |
Size txtspanLen (Txt_Span ts) | |
{ | |
Size len = ts.end - ts.begin; | |
return len; | |
} | |
header_function | |
Txt txtspanExtract (Txt_Arena *ta, Txt_Span ts) | |
{ | |
return txtNewL(ta, txtspanStr(ts), txtspanLen(ts)); | |
} | |
header_function | |
Bool txtspanIsEqualStr (Txt_Span ts, Char *str) | |
{ | |
Size strs = strlen(str); | |
Size tss = ts.end - ts.begin; | |
return (strs == tss) && (memcmp(str, ts.t.str + ts.begin, tss) == 0); | |
} | |
header_function | |
Bool txtspanIsEqualLabel (Txt_Span ts, Label l) | |
{ | |
return labelIsEqualStrL(l, ts.t.str + ts.begin, ts.end-ts.begin); | |
} | |
header_function | |
Label labelMakeTS (Txt_Span ts) | |
{ | |
return labelMakeL(ts.t.str + ts.begin, cast_val(Uint8, ts.end - ts.begin)); | |
} | |
typedef struct Txt_Fmt_Chunk Txt_Fmt_Chunk; | |
typedef struct Txt_Fmt_Pool { | |
Memory_Allocator_Interface miface; | |
Memory_Slab chunk_allocator; | |
Size chunk_count; | |
} Txt_Fmt_Pool; | |
typedef struct Txt_Fmt { | |
Txt_Fmt_Pool *pool; | |
Size length; | |
Txt_Fmt_Chunk *first_chunk, *last_chunk; | |
} Txt_Fmt; | |
struct Txt_Fmt_Chunk { | |
Txt_Fmt_Chunk *next_chunk; | |
Char data[55]; | |
Uint8 filled; | |
}; | |
static_assert(sizeof(Txt_Fmt_Chunk) == 64, "Txt fmt chunk won't fit in a single cache line"); | |
#define TXT_FMT_CHARS_PER_CHUNK_ elemin(compound_init(Txt_Fmt_Chunk, {}).data) | |
static_assert(TXT_FMT_CHARS_PER_CHUNK_ == 55, "Number of bytes in each Txt fmt chunk is wrong"); | |
header_function | |
void txtfmtPoolCreate (Txt_Fmt_Pool *tp, Memory_Allocator_Interface mem, Uint32 chunk_count) | |
{ | |
// 512 chunks in 32 KiB (same size as the free-grid metadata) | |
chunk_count = maximum(chunk_count, 512u); | |
// This is initialized like this instead of a compound initialize to avoid the stack from blowing up. | |
memset(tp, 0, sizeof(*tp)); | |
tp->chunk_count = chunk_count; | |
slabCreate(&tp->chunk_allocator, mem, chunk_count, sizeof(Txt_Fmt_Chunk)); | |
} | |
header_function | |
void txtfmtPoolDelete (Txt_Fmt_Pool *tp) | |
{ | |
slabDelete(&tp->chunk_allocator); | |
} | |
header_function | |
Txt_Fmt txtfmtCreate (Txt_Fmt_Pool *tp) | |
{ | |
Txt_Fmt txt = { | |
.pool = tp, | |
}; | |
txt.first_chunk = cast_val(Txt_Fmt_Chunk*, slabRequest(&tp->chunk_allocator)); | |
memzero(txt.first_chunk); | |
txt.last_chunk = txt.first_chunk; | |
return txt; | |
} | |
header_function | |
void txtfmtRemove (Txt_Fmt *tf) | |
{ | |
for (Txt_Fmt_Chunk *tc = tf->first_chunk, *tcc; tc != nullptr; tc = tcc) { | |
tcc = tc->next_chunk; | |
slabRescind(&tf->pool->chunk_allocator, tc); | |
} | |
} | |
header_function | |
Txt_Fmt_Chunk* txtfmt_GetLastChunk (Txt_Fmt *tf) | |
{ | |
if (tf->last_chunk->filled == TXT_FMT_CHARS_PER_CHUNK_) { | |
tf->last_chunk->next_chunk = cast_val(Txt_Fmt_Chunk*, slabRequest(&tf->pool->chunk_allocator)); | |
memzero(tf->last_chunk->next_chunk); | |
tf->last_chunk = tf->last_chunk->next_chunk; | |
} | |
return tf->last_chunk; | |
} | |
header_function | |
Size txtfmtAffix (Txt_Fmt *tf, Char c) | |
{ | |
Txt_Fmt_Chunk *s = txtfmt_GetLastChunk(tf); | |
s->data[s->filled] = c; | |
s->filled++; | |
tf->length++; | |
return 1; | |
} | |
header_function | |
PRINT_CALLBACK_FUNC(txtfmt_PrintCallback) | |
{ | |
unused_variable(buffer_cap); | |
Txt_Fmt *tf = cast_val(Txt_Fmt*, userdata); | |
Txt_Fmt_Chunk *s = txtfmt_GetLastChunk(tf); | |
debugAssert(s); | |
Size length = cast_val(Size, buffer_len); | |
Size empty = TXT_FMT_CHARS_PER_CHUNK_ - s->filled; | |
if (length > empty) { | |
memcpy(s->data + s->filled, filled_buffer, empty); | |
s->filled += cast_val(Uint8, empty); | |
if (length - empty) { | |
s = txtfmt_GetLastChunk(tf); | |
memcpy(s->data, filled_buffer + empty, length - empty); | |
s->filled += cast_val(Uint8, length - empty); | |
} | |
} else if (length != 0) { | |
memcpy(s->data + s->filled, filled_buffer, length); | |
s->filled += cast_val(Uint8, length); | |
} | |
tf->length += length; | |
} | |
with_clang_gcc(__attribute__(( format( __printf__, 2, 0 )))) | |
header_function | |
Size txtfmtAppendFV (Txt_Fmt *tf, const Char *fmt, va_list va) | |
{ | |
Char buf[TXT_FMT_CHARS_PER_CHUNK_] = {}; | |
Size size = printStringVarArg(buf, elemin(buf), fmt, va, txtfmt_PrintCallback, tf); | |
return size; | |
} | |
with_clang_gcc(__attribute__ (( format( __printf__, 2, 3 )))) | |
header_function | |
Size txtfmtAppendF (Txt_Fmt *tf, const Char *fmt, ...) | |
{ | |
va_list args; | |
va_start(args, fmt); | |
Size result = txtfmtAppendFV(tf, fmt, args); | |
va_end(args); | |
return result; | |
} | |
header_function | |
Size txtfmtAppendCL (Txt_Fmt *tf, const Char *tc, Size length) | |
{ | |
Size copied = 0; | |
Size len = length; | |
while (len) { | |
Txt_Fmt_Chunk *s = txtfmt_GetLastChunk(tf); | |
Size l = minimum(len, TXT_FMT_CHARS_PER_CHUNK_); | |
l = minimum(l, TXT_FMT_CHARS_PER_CHUNK_ - s->filled); | |
memcpy(s->data + s->filled, tc + copied, l); | |
copied += l; | |
len -= l; | |
s->filled += cast_val(Uint8, l); | |
} | |
tf->length += length; | |
return copied; | |
} | |
header_function | |
Size txtfmtAppendC (Txt_Fmt *tf, const Char *tc) | |
{ | |
Size len = strlen(tc); | |
Size result = txtfmtAppendCL(tf, tc, len); | |
return result; | |
} | |
header_function | |
Size txtfmtAppendT (Txt_Fmt *tf, Txt txt) | |
{ | |
Size result = txtfmtAppendCL(tf, txtStr(txt), txtLen(txt)); | |
return result; | |
} | |
header_function | |
Size txtfmtAppendTs (Txt_Fmt *tf, Txt_Span ts) | |
{ | |
Size result = txtfmtAppendCL(tf, txtStr(ts.t) + ts.begin, cast_val(Size, ts.end - ts.begin)); | |
return result; | |
} | |
header_function | |
Size txtfmtAppendLbl (Txt_Fmt *tf, Label lbl) | |
{ | |
Size result = txtfmtAppendCL(tf, lbl.str, lbl.len); | |
return result; | |
} | |
header_function | |
Txt txtfmtFinish (Txt_Fmt *tf, Txt_Arena *ta) | |
{ | |
Txt t = txt_Allocate(ta, tf->length); | |
Size filled = 0; | |
for (Txt_Fmt_Chunk *s = tf->first_chunk; s != nullptr; s = s->next_chunk) { | |
debugAssert((filled + s->filled) < UINT32_MAX); | |
memcpy(txtStr(t) + filled, s->data, s->filled); | |
filled += s->filled; | |
} | |
(txtStr(t))[filled] = '\0'; | |
txtfmtRemove(tf); | |
return t; | |
} | |
#undef TXT_FMT_BYTES_PER_CHUNK_ | |
/************************************************************************************************ | |
* Ravel (Print to string) | |
* | |
* What can change the nature of a man? | |
*/ | |
header_function | |
Size ravelSint8 (Txt_Fmt *fmt, Sint8 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%d", cast_val(Sint, val)); | |
return s; | |
} | |
header_function | |
Size ravelUint8 (Txt_Fmt *fmt, Uint8 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%u", cast_val(Uint, val)); | |
return s; | |
} | |
header_function | |
Size ravelSint16 (Txt_Fmt *fmt, Sint16 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%d", cast_val(Sint, val)); | |
return s; | |
} | |
header_function | |
Size ravelUint16 (Txt_Fmt *fmt, Uint16 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%u", cast_val(Uint, val)); | |
return s; | |
} | |
header_function | |
Size ravelSint32 (Txt_Fmt *fmt, Sint32 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%d", val); | |
return s; | |
} | |
header_function | |
Size ravelUint32 (Txt_Fmt *fmt, Uint32 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%u", val); | |
return s; | |
} | |
header_function | |
Size ravelFloat32 (Txt_Fmt *fmt, Float32 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%f", cast_val(Float64, val)); | |
return s; | |
} | |
header_function | |
Size ravelSint64 (Txt_Fmt *fmt, Sint64 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%lld", val); | |
return s; | |
} | |
header_function | |
Size ravelUint64 (Txt_Fmt *fmt, Uint64 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%llu", val); | |
return s; | |
} | |
header_function | |
Size ravelFloat64 (Txt_Fmt *fmt, Float64 val) | |
{ | |
Size s = txtfmtAppendF(fmt, "%f", val); | |
return s; | |
} | |
header_function | |
Size ravelStr (Txt_Fmt *fmt, Char *str) | |
{ | |
Size s = txtfmtAppendF(fmt, "\"%s\"", str); | |
return s; | |
} | |
header_function | |
Size ravelLabel (Txt_Fmt *fmt, Label lbl) | |
{ | |
Size s = txtfmtAppendF(fmt, "\"%.*s\"", cast_val(Sint, lbl.len), lbl.str); | |
return s; | |
} | |
header_function | |
Size ravelTxt (Txt_Fmt *fmt, Txt t) | |
{ | |
Size s = txtfmtAppendF(fmt, "\"%.*s\"", cast_val(Sint, txtLen(t)), txtStr(t)); | |
return s; | |
} | |
header_function | |
Size ravelTxtSpan (Txt_Fmt *fmt, Txt_Span ts) | |
{ | |
Size s = txtfmtAppendF(fmt, "\"%.*s\"", cast_val(Sint, txtspanLen(ts)), txtspanStr(ts)); | |
return s; | |
} | |
#define ravel(_fmt, _val) \ | |
_Generic((_val) \ | |
, Sint8 : ravelSint8 \ | |
, Uint8 : ravelUint8 \ | |
, Sint16 : ravelSint16 \ | |
, Uint16 : ravelUint16 \ | |
, Sint32 : ravelSint32 \ | |
, Uint32 : ravelUint32 \ | |
, Float32 : ravelFloat32 \ | |
, Sint64 : ravelSint64 \ | |
, Uint64 : ravelUint64 \ | |
, Float64 : ravelFloat64 \ | |
, Char* : ravelStr \ | |
, Label : ravelLabel \ | |
, Txt : ravelTxt \ | |
, Txt_Span : ravelTxtSpan \ | |
)(_fmt, _val) | |
/************************************************************************************************ | |
* B-Tree | |
*/ | |
// B-Tree implementation | |
// Reference: Chapter 18 of Introduction to Algorithms - Fourth Edition by Cormen, et al. | |
// Some changes have been made to make the implementation more amenable to future optimizations | |
// (SIMD or otherwise). | |
// First, the idea of "minimum degree" has been discarded in favour of having a maximum count. | |
// This means that we store an even number of keys instead of an odd number. | |
// Second, there is no leaf boolean; the leaf property is known by comparing the first child to nullptr. | |
// WARN(naman): This BTree doesn't rebalances itself when keys are dropped. This means that the size | |
// of the tree only ever goes up. Thus, don't use this in places where a lot of deletions will | |
// be taking place, since a unnecessary large table will slow down the lookups. | |
// NOTE(naman): All dynamic allocations are of the same size (sizeof(BTree_Node *)), so a slab | |
// allocator can be used. Alternatively, since not too many deletions would be performed (see above), | |
// a simple stack allocator will also suffice. | |
// The value of M is chosen so that the all keys can be compared using 5 256-wide SSE instructions. | |
// In addition, this also makes the BTree_Node align with cache line sizes of 64, 128, 256 and 512 bytes. | |
#define M 20 | |
typedef struct BTree_Node { | |
// In the keys and data array, [M/2 - 1, M] elements have to be present | |
// NOTE(naman): More often used members are on top together for cache friendliness. | |
Uint64 keys[M]; // Called k in Cormen | |
Size count; // Called n in Cormen | |
struct BTree_Node *children[M + 1]; // Called c in Cormen | |
void *data[M]; | |
struct BTree_Node *next, *prev; // Unused for now | |
} BTree_Node; | |
static_assert(sizeof(BTree_Node) == 512, "BTree_Node's size is not expected"); | |
typedef struct BTree { | |
Memory_Allocator_Interface miface; | |
BTree_Node *root; | |
} BTree; | |
header_function | |
void btreeCreate (BTree *b, Memory_Allocator_Interface miface) | |
{ | |
memzero(b); | |
b->miface = miface; | |
b->root = cast_val(BTree_Node *, memIRequest(b->miface, sizeof(*b->root))); | |
} | |
header_function | |
void btreeDelete (BTree *b) | |
{ | |
memIRescind(b->miface, b->root); | |
} | |
header_function | |
void* btree_FindReplace (BTree *b, Uint64 key, Bool replace_value, void *replacement) | |
{ | |
BTree_Node *n = b->root; | |
while (n != nullptr) { | |
Size i; | |
Bool found_equal = false; | |
for (i = 0; i < n->count; i++) { | |
if (n->keys[i] == key) { | |
found_equal = true; | |
break; | |
} else if (n->keys[i] > key) { | |
break; | |
} | |
} | |
if (found_equal) { | |
void *old = n->data[i]; | |
if (replace_value) n->data[i] = replacement; | |
return old; | |
} else if (n->children[0] == nullptr) { // Leaf Node | |
return nullptr; | |
} else { | |
// If greater found, i is pointing to the key just greater, meaning the i-th child is | |
// between the just smaller and just greater keys. If greater was not found, then i is | |
// equal to n->count, which also points to the last child. | |
n = n->children[i]; | |
} | |
} | |
return nullptr; | |
} | |
header_function | |
void* btreeSearch (BTree *b, Uint64 key) { | |
void *result = btree_FindReplace (b, key, false, nullptr); | |
return result; | |
} | |
header_function | |
void* btreeReplace (BTree *b, Uint64 key, void *new_value) { | |
void *result = btree_FindReplace (b, key, true, new_value); | |
return result; | |
} | |
header_function | |
void* btreeRemove (BTree *b, Uint64 key) { | |
void *result = btree_FindReplace (b, key, true, nullptr); | |
return result; | |
} | |
// NOTE(naman): Function returns the previous value, if any. | |
header_function | |
void btree_SplitChild (BTree *b, BTree_Node *parent, Size i) | |
{ | |
// All indices in this function's comments (I, T, etc.) are zero indexed. Play close attention. | |
// To mark them against regular numbers, they are written in capital. `i` and `I` are both the same, | |
// `i` is used when talking about numbers (count, etc.), while `I` is used when talking about indices. | |
// This function gets called if the I-th child of the node has become full (i.e., has m keys) | |
// and need to be split. | |
BTree_Node *full = parent->children[i]; | |
BTree_Node *newnode = cast_val(BTree_Node *, memIRequest(b->miface, sizeof(*newnode))); | |
// We will move the last m/2-1 keys from `full` into `newnode`. Since `full` had m keys, this means that | |
// `full` will be left with m/2+1 keys from [0...M/2] and m/2-1 keys from [M/2+1...M-1] wil become `newnode`'s keys | |
// from [0..M/2-1]. | |
newnode->count = M/2 - 1; | |
memcpy(&newnode->keys[0], &full->keys[M/2 + 1], newnode->count * sizeof(newnode->keys[0])); | |
memcpy(&newnode->data[0], &full->data[M/2 + 1], newnode->count * sizeof(newnode->data[0])); | |
// If the `full` node is not a leaf, it has a bunch of children. Since the trailing keys got moved, | |
// the children whose points fall between those keys also have to be moved. Because we moved m/2-1 keys, | |
// there would be m/2 children associated with them that also need to be moved. This means that the | |
// M/2-th key in `full` will have no right-child anymore. But that's okay, since the M/2-th | |
// key will get get moved to `parent` anyways. So, we will move m/2 children from `full`'s [M/2+1...M] | |
// to `newnode`'s [0...M/2-1]. | |
if (full->children[0] != nullptr) { // Not a leaf | |
memcpy(&newnode->children[0], &full->children[M/2+1], (newnode->count + 1) * sizeof(newnode->children[0])); | |
} | |
// Finally, we set the `full`'s count to m/2. Remember that `full` actually still has a total of m/2+1 elements. | |
// However, the element on index M/2 will soon get moved to the `parent`. | |
full->count = M/2; | |
// But before we can move `full`'s M/2 element and attach `newnode` as a new child, we have to make space | |
// for it in `parent`. First, let's move the children. Let parent->count be n. If the total number | |
// of keys that the parent has is n from 0 to N-1, the children would be n+1 from 0 to N. Since the | |
// `full` was attached at index I, we nee to move all children from [I+1...N] to [I+2...N+1], so that | |
// we can add `newnode` as a child at index I+1. | |
for (Size j = (parent->count + 1); j >= (i + 2); j--) { | |
parent->children[j] = parent->children[j-1]; | |
} | |
parent->children[i+1] = newnode; | |
// Now, the turn for keys. Since `full`'s M/2 element has to be move to `parent`'s I element (right between | |
// the I-th and (I+1)-th child), we need to make space for it by moving elements from [I...N-1] to [I+1...N]. | |
for (Size j = parent->count; j >= (i + 1); j--) { | |
parent->keys[j] = parent->keys[j-1]; | |
parent->data[j] = parent->data[j-1]; | |
} | |
// Now that there is a space in `parent`'s key array, let's put `full`'s M/2 element there. | |
parent->keys[i] = full->keys[M/2]; | |
parent->data[i] = full->data[M/2]; | |
parent->count++; | |
} | |
pragma_msvc("warning ( push )"); | |
pragma_msvc("warning ( disable: 6385 )"); // Analyze: READ_OVERRUN (Reading invalid data from 'n->keys' and 'n->children') | |
header_function | |
void* btreeInsert (BTree *b, Uint64 key, void *datum) | |
{ | |
if (b->root->count == M) { | |
// This is a clever hack. Cormen wanted to use the existing splitting function (btree_SplitChild) to spit the root too. | |
// But splitting requires transferring one item to our parent, and the root has no parent. So, they temporarily create | |
// an invalid tree by creating a new node with zero elements and setting the root as its child. But the splitting process | |
// adds one element and one more child (the new splitted node) to the new root, thus making the tree valid again. | |
BTree_Node *s = cast_val(BTree_Node *, memIRequest(b->miface, sizeof(*s))); | |
s->children[0] = b->root; | |
b->root = s; | |
btree_SplitChild(b, b->root, 0); | |
} | |
BTree_Node *n = b->root; | |
while (n != nullptr) { | |
Size i = n->count; | |
if (n->children[0] == nullptr) { // Leaf | |
if ((key >= n->keys[0]) && (key <= n->keys[i-1])) { // If the key is in the space of this node | |
for (Size j = 0; j < i; j++) { // Check if the key has already been inserted | |
if (key == n->keys[j]) { | |
void *result = n->data[j]; | |
n->data[j] = datum; | |
return result; | |
} | |
} | |
} | |
while ((i >= 1) && (key < n->keys[i-1])) { // Else move the data | |
n->keys[i] = n->keys[i-1]; | |
n->data[i] = n->data[i-1]; | |
i--; | |
} | |
n->keys[i] = key; // And copy the key | |
n->data[i] = datum; | |
n->count++; | |
return nullptr; | |
} else { | |
while ((i >= 1) && (key < n->keys[i-1])) { | |
i--; | |
} | |
if (n->children[i]->count == M) { | |
btree_SplitChild(b, n, i); | |
if (key > n->keys[i]) { | |
i++; | |
} | |
} | |
n = n->children[i]; | |
} | |
} | |
return nullptr; | |
} | |
pragma_msvc("warning ( pop )"); | |
header_function | |
void* btreeSwitch (BTree *b, Uint64 key, void *oldval, void *newval) | |
{ | |
return oldval ? btreeReplace(b, key, newval) : btreeInsert(b, key, newval); | |
} | |
// Return false to prematuerely stop iteration | |
#define BTREE_ITERATOR_FUNC(name) Bool name (void *funcdata, Uint64 key, void* value) | |
header_function | |
Bool btree_IterateNode (BTree_Node *bn, BTREE_ITERATOR_FUNC((*func)), void *funcdata) | |
{ | |
if (bn == nullptr) return true; | |
Size i; | |
for (i = 0; i < bn->count; i++) { | |
if (btree_IterateNode(bn->children[i], func, funcdata) == false) return false; | |
if (func(funcdata, bn->keys[i], bn->data[i]) == false) return false; | |
} | |
if (btree_IterateNode(bn->children[i], func, funcdata) == false) return false; | |
return true; | |
} | |
header_function | |
Bool btreeIterate (BTree *b, BTREE_ITERATOR_FUNC((*func)), void *funcdata) | |
{ | |
Bool result = btree_IterateNode(b->root, func, funcdata); | |
return result; | |
} | |
#undef M | |
/************************************************************************************************ | |
* Hashmap | |
*/ | |
// WARN(naman): Same performance characteristics as the BTree. See it's documentation for more details | |
typedef struct Hashmap_Entry { | |
struct Hashmap_Entry *next; | |
Uint64 hash; | |
Uint64 key; | |
void *value; | |
} Hashmap_Entry; | |
typedef struct Hashmap { | |
BTree btree; | |
COMPARE_FUNC((*compare_func)); | |
Memory_Allocator_Interface entry_alloc; // Allocates sizeof(Hashmap_Entry) = 16 bytes at a time | |
} Hashmap; | |
header_function | |
void hashmapCreate (Hashmap *hm, COMPARE_FUNC((*compare_func)), | |
Memory_Allocator_Interface entry_alloc, Memory_Allocator_Interface node_alloc) | |
{ | |
*hm = compound_init(Hashmap, { | |
.compare_func = compare_func, | |
.entry_alloc = entry_alloc, | |
}); | |
btreeCreate(&hm->btree, node_alloc); | |
} | |
// Return false to prematuerely stop iteration | |
#define HASHMAP_ITERATOR_FUNC(name) Bool name (void *funcdata, Hashmap_Entry *entry) | |
struct Hashmap_Iterator_Data { | |
HASHMAP_ITERATOR_FUNC((*func)); | |
void *funcdata; | |
}; | |
header_function | |
BTREE_ITERATOR_FUNC(hashmap_IterateNode) | |
{ | |
unused_variable(key); | |
struct Hashmap_Iterator_Data *data = cast_val(struct Hashmap_Iterator_Data *, funcdata); | |
Hashmap_Entry *list = cast_val(Hashmap_Entry *, value); | |
for (Hashmap_Entry *e = list, *en; e != nullptr; e = en) { | |
en = e->next; | |
if ((data->func)(data->funcdata, e) == false) return false; | |
} | |
return true; | |
} | |
header_function | |
Bool hashmapIterate (Hashmap *hm, HASHMAP_ITERATOR_FUNC((*func)), void *funcdata) | |
{ | |
struct Hashmap_Iterator_Data data = { | |
.func = func, | |
.funcdata = funcdata, | |
}; | |
Bool result = btree_IterateNode(hm->btree.root, hashmap_IterateNode, &data); | |
return result; | |
} | |
header_function | |
HASHMAP_ITERATOR_FUNC(hashmap_DeleteNodeList) | |
{ | |
Hashmap *hm = cast_val(Hashmap *, funcdata); | |
memIRescind(hm->entry_alloc, entry); | |
return true; | |
} | |
header_function | |
void hashmapDelete (Hashmap *hm) | |
{ | |
hashmapIterate(hm, hashmap_DeleteNodeList, hm); | |
btreeDelete(&hm->btree); | |
memzero(hm); | |
} | |
// Returns the existing data from BTree and saves out matched entry into matched pointer | |
header_function | |
Hashmap_Entry* hashmap_FindEntry (Hashmap *hm, Uint64 hash, Uint64 key, Hashmap_Entry **matched) | |
{ | |
Hashmap_Entry *list = cast_val(Hashmap_Entry *, btreeSearch(&hm->btree, hash)); | |
for (Hashmap_Entry *ei = list; ei != nullptr; ei = ei->next) { | |
if (hm->compare_func(cast_bit(void *, ei->key), cast_bit(void *, key)) == 0) { | |
if (matched) *matched = ei; | |
return list; | |
} | |
} | |
return list; | |
} | |
header_function | |
void hashmapInsert (Hashmap *hm, Uint64 hash, Uint64 key, void *value) | |
{ | |
Hashmap_Entry *matched = nullptr; | |
Hashmap_Entry *list = hashmap_FindEntry(hm, hash, key, &matched); | |
if (matched) { | |
matched->value = value; | |
return; | |
} | |
Hashmap_Entry *e = cast_val(Hashmap_Entry *, memIRequest(hm->entry_alloc, sizeof(*e))); | |
*e = compound_init(Hashmap_Entry, { | |
.next = list, | |
.hash = hash, | |
.key = key, | |
.value = value, | |
}); | |
btreeSwitch(&hm->btree, hash, list, e); | |
} | |
header_function | |
void* hashmapSearch (Hashmap *hm, Uint64 hash, Uint64 key) | |
{ | |
Hashmap_Entry *matched = nullptr; | |
hashmap_FindEntry(hm, hash, key, &matched); | |
return matched ? matched->value : nullptr; | |
} | |
header_function | |
void hashmapRemove (Hashmap *hm, Uint64 hash, Uint64 key) | |
{ | |
Hashmap_Entry *matched = nullptr; | |
Hashmap_Entry *list = hashmap_FindEntry(hm, hash, key, &matched); | |
if (matched) { | |
if (list == matched) { | |
btreeSwitch(&hm->btree, hash, list, list->next); | |
} else { | |
for (Hashmap_Entry *e = list; e != nullptr; e = e->next) { | |
if (e->next == matched) { | |
Hashmap_Entry *old = e->next; | |
e->next = e->next->next; | |
memIRescind(hm->entry_alloc, old); | |
return; | |
} | |
} | |
} | |
} | |
} | |
/****************************************************************************************** | |
* Intrusive Circular Doubly Linked List | |
* Inspired from github.com/torvalds/linux/blob/master/include/linux/list.h | |
*/ | |
typedef struct List_Node { | |
struct List_Node *next, *prev; | |
} List_Node; | |
/* To get the node (container struct) holding the linked list node */ | |
#define listThisNode(nodeptr, type, member) containerof((nodeptr), type, member) | |
#define listNextNode(nodeptr, type, member) containerof((nodeptr)->next, type, member) | |
#define listPrevNode(nodeptr, type, member) containerof((nodeptr)->prev, type, member) | |
/* | |
* Initialize the list ike this: | |
* ***************************** | |
* typedef struct N { | |
* List_Node l; | |
* } N; | |
* | |
* N n; | |
* n.l = listInit(n.l); | |
*/ | |
#define listInit(name) (List_Node) {&(name), &(name)} | |
#define listReset(ptr) do{(ptr)->next = (ptr); (ptr)->prev = (ptr);}while(0) | |
header_function | |
void list_Add (List_Node *newnode, List_Node *prev, List_Node *next) | |
{ | |
next->prev = newnode; | |
newnode->next = next; | |
newnode->prev = prev; | |
prev->next = newnode; | |
} | |
header_function | |
void listAddAfter (List_Node *newnode, List_Node *after_this) | |
{ | |
list_Add(newnode, after_this, after_this->next); | |
} | |
header_function | |
void listAddBefore (List_Node *newnode, List_Node *before_this) | |
{ | |
list_Add(newnode, before_this->prev, before_this); | |
} | |
header_function | |
void list_RemoveNodeBetween (List_Node * prev, List_Node * next) | |
{ | |
next->prev = prev; | |
prev->next = next; | |
} | |
header_function | |
void listRemove (List_Node *entry) | |
{ | |
list_RemoveNodeBetween(entry->prev, entry->next); | |
entry->next = nullptr; | |
entry->prev = nullptr; | |
} | |
header_function | |
void listRemoveAndInit (List_Node *entry) | |
{ | |
list_RemoveNodeBetween(entry->prev, entry->next); | |
listReset(entry); | |
} | |
header_function | |
void listReplace(List_Node *old, List_Node *newnode) | |
{ | |
newnode->next = old->next; | |
newnode->next->prev = newnode; | |
newnode->prev = old->prev; | |
newnode->prev->next = newnode; | |
} | |
header_function | |
void listReplaceAndInit(List_Node *old, List_Node *newnode) | |
{ | |
listReplace(old, newnode); | |
listReset(old); | |
} | |
header_function | |
void listSwap(List_Node *entry1, List_Node *entry2) | |
{ | |
List_Node *pos = entry2->prev; | |
listRemove(entry2); | |
listReplace(entry1, entry2); | |
if (pos == entry1) pos = entry2; | |
listAddAfter(entry1, pos); | |
} | |
header_function | |
void listMoveAfter (List_Node *list, List_Node *after_this) | |
{ | |
list_RemoveNodeBetween(list->prev, list->next); | |
listAddAfter(list, after_this); | |
} | |
header_function | |
void listMoveBefore (List_Node *list, List_Node *before_this) | |
{ | |
list_RemoveNodeBetween(list->prev, list->next); | |
listAddBefore(list, before_this); | |
} | |
header_function | |
Bool listIsEmpty (List_Node *node) | |
{ | |
Bool result = (node->next == node); | |
return result; | |
} | |
// Splice in a List `list`, between the Nodes `node` and `node->next` | |
header_function | |
void list_Splice (List_Node *list, List_Node *node) | |
{ | |
List_Node *first = list->next; | |
List_Node *last = list->prev; | |
List_Node *at = node->next; | |
first->prev = node; | |
node->next = first; | |
last->next = at; | |
at->prev = last; | |
} | |
// Splice in a List `list`, between the Nodes `node` and `node->next` | |
header_function | |
void listSplice (List_Node *list, List_Node *node) | |
{ | |
if (!listIsEmpty(list)) list_Splice(list, node); | |
} | |
header_function | |
void listSpliceInit (List_Node *list, List_Node *node) | |
{ | |
if (!listIsEmpty(list)) { | |
list_Splice(list, node); | |
listReset(list); | |
} | |
} | |
// NOTE(naman): The list head should be a placeholder terminal node with no associated metadata. Not doing this makes | |
// safe iteration very difficult and/or tedious. Trust me, I'm wasted too much time trying various schemes out. | |
# define LIST_FOR_EACH(_idx, _head) \ | |
for (List_Node \ | |
* _idx = (_head)->next, \ | |
* _idx##_2_ = (_head)->next->next; \ | |
_idx != (_head); \ | |
_idx = _idx##_2_, _idx##_2_ = _idx##_2_ -> next) | |
# define LIST_FOR_EACH_REVERSE(_idx, _head) \ | |
for (List_Node \ | |
* _idx = (_head)->prev, \ | |
* _idx##_2_ = (_head)->prev->prev; \ | |
_idx != (_head); \ | |
_idx = _idx##_2_, _idx##_2_ = _idx##_2_ -> prev) | |
/****************************************************************************************** | |
* Vinyas (.viny) | |
* Text Serialization Format | |
* | |
* Viny* vinyCreate (Memory_Allocator_Interface miface) | |
* void vinyDelete (Viny *v) | |
* Bool vinyParseTxt (Viny *v, Txt t) | |
* void vinyPrint (Viny *v, PRINT_FUNC((*print_func)), void *print_userdata) | |
* | |
*/ | |
typedef struct Viny_Table { | |
BTree btree; | |
List_Node sequential; | |
} Viny_Table; | |
typedef struct Viny_Entry { | |
List_Node bucket_list; | |
List_Node sequential; | |
Txt_Span key; | |
struct { | |
Viny_Table *table; | |
Txt_Span string; | |
} value; | |
} Viny_Entry; | |
typedef struct Viny { | |
Memory_Allocator_Interface system_miface; | |
Memory_Allocator_Interface push_miface; | |
Memory_Push mp; | |
Viny_Table global_table; | |
struct { | |
Char *msg; | |
Uint32 line, column; | |
} err; | |
Viny_Entry global_entry; | |
} Viny; | |
header_function | |
MEMORY_ALLOCATOR_REQUEST_FUNC(vinyMemoryAllocate) | |
{ | |
Memory_Push *mp = cast_val(Memory_Push*, userdata); | |
void *ptr = memPushAllot(mp, amount); | |
return ptr; | |
} | |
header_function | |
MEMORY_ALLOCATOR_RESCIND_FUNC(vinyMemoryDeallocate) | |
{ | |
Memory_Push *mp = cast_val(Memory_Push*, userdata); | |
unused_variable(mp); | |
unused_variable(ptr); | |
} | |
header_function | |
Viny* vinyCreate (Memory_Allocator_Interface miface) | |
{ | |
Viny *v = cast_val(Viny*, memIRequest(miface, sizeof(*v))); | |
v->system_miface = miface; | |
memPushCreate(&v->mp, miface, MiB(1)); | |
v->push_miface = memICreate(&v->mp, vinyMemoryAllocate, vinyMemoryDeallocate); | |
btreeCreate(&v->global_table.btree, v->push_miface); | |
v->global_entry.value.table = &v->global_table; | |
return v; | |
} | |
header_function | |
void vinyDelete (Viny *v) | |
{ | |
btreeDelete(&v->global_table.btree); | |
memPushDelete(&v->mp); | |
Memory_Allocator_Interface miface = v->system_miface; | |
memIRescind(miface, v); | |
} | |
typedef enum Viny_Token_Kind_ { | |
Viny_Token_Kind_EOF_ = 0, | |
Viny_Token_Kind_STRING_, | |
Viny_Token_Kind_COLON_, | |
Viny_Token_Kind_BRACE_OPEN_, | |
Viny_Token_Kind_BRACE_CLOSE_, | |
} Viny_Token_Kind_; | |
typedef struct Viny_Token_ { | |
Txt_Span ts; | |
Viny_Token_Kind_ kind; | |
Byte _pad[4]; | |
} Viny_Token_; | |
typedef struct Viny_Tokenizer_ { | |
Txt t; | |
Viny_Token_ token; | |
Sint64 cursor; | |
Uint32 line, column; | |
Label err_msg; | |
} Viny_Tokenizer_; | |
header_function | |
Bool viny_TokerError (Viny_Tokenizer_ *toker, Label msg) | |
{ | |
toker->err_msg = msg; | |
return false; | |
} | |
header_function | |
void viny_TokenizerAdvance (Viny_Tokenizer_ *toker, uint32_t count) | |
{ | |
for (size_t i = 0; (i < count) && txtStr(toker->t)[toker->cursor]; i++) { | |
if (txtStr(toker->t)[toker->cursor] == '\n') { | |
toker->line++; | |
toker->column = 1; | |
} else { | |
toker->column++; | |
} | |
toker->cursor++; | |
} | |
} | |
header_function | |
void viny_TokenizerEatWhitespace (Viny_Tokenizer_ *toker) | |
{ | |
Bool ate_something_this_loop = true; | |
while (ate_something_this_loop) { | |
ate_something_this_loop = false; | |
while ((txtStr(toker->t)[toker->cursor] == ' ') || | |
(txtStr(toker->t)[toker->cursor] == '\t') || | |
(txtStr(toker->t)[toker->cursor] == '\r') || | |
(txtStr(toker->t)[toker->cursor] == '\n')) { | |
ate_something_this_loop = true; | |
viny_TokenizerAdvance(toker, 1); | |
} | |
if (txtStr(toker->t)[toker->cursor] == '#') { | |
viny_TokenizerAdvance(toker, 1); | |
if (txtStr(toker->t)[toker->cursor] == '{') { | |
ate_something_this_loop = true; | |
viny_TokenizerAdvance(toker, 2); | |
Uint comment_nesting = 1; | |
while (txtStr(toker->t)[toker->cursor]) { | |
if (txtStr(toker->t)[toker->cursor] == '#') { | |
viny_TokenizerAdvance(toker, 1); | |
if (txtStr(toker->t)[toker->cursor] == '{') { | |
comment_nesting++; | |
viny_TokenizerAdvance(toker, 2); | |
} else if (txtStr(toker->t)[toker->cursor] == '}') { | |
comment_nesting--; | |
viny_TokenizerAdvance(toker, 2); | |
} | |
} | |
if (comment_nesting == 0) break; | |
viny_TokenizerAdvance(toker, 1); | |
} | |
} else { | |
ate_something_this_loop = true; | |
while (txtStr(toker->t)[toker->cursor]) { | |
if (txtStr(toker->t)[toker->cursor] == '\n') { | |
if ((toker->cursor) > 0 && txtStr(toker->t)[toker->cursor-1] == '\\') { | |
// do nothing | |
} else { | |
break; | |
} | |
} | |
viny_TokenizerAdvance(toker, 1); | |
} | |
viny_TokenizerAdvance(toker, 1); | |
} | |
} | |
} | |
} | |
header_function | |
Txt_Span viny_TokenizeString (Viny_Tokenizer_ *toker, Char ending_quote) | |
{ | |
Sint64 begin = toker->cursor; | |
if (ending_quote) { | |
while (txtStr(toker->t)[toker->cursor] && | |
(txtStr(toker->t)[toker->cursor] != ending_quote)) { | |
viny_TokenizerAdvance(toker, 1); | |
if (txtStr(toker->t)[toker->cursor-1] == '\\') { | |
viny_TokenizerAdvance(toker, 1); | |
} | |
} | |
} else { | |
while (txtStr(toker->t)[toker->cursor] && | |
(txtStr(toker->t)[toker->cursor] != ' ') && | |
(txtStr(toker->t)[toker->cursor] != '\t') && | |
(txtStr(toker->t)[toker->cursor] != '\n') && | |
(txtStr(toker->t)[toker->cursor] != '\r') && | |
(txtStr(toker->t)[toker->cursor] != ':') && | |
(txtStr(toker->t)[toker->cursor] != '{') && | |
(txtStr(toker->t)[toker->cursor] != '}')) { | |
viny_TokenizerAdvance(toker, 1); | |
if (txtStr(toker->t)[toker->cursor-1] == '\\') { | |
viny_TokenizerAdvance(toker, 1); | |
} | |
} | |
} | |
Sint64 end = toker->cursor; | |
Txt_Span ts = txtGetSpan(toker->t, begin, end); | |
return ts; | |
} | |
header_function | |
Bool viny_Tokenize (Viny_Tokenizer_ *toker) | |
{ | |
viny_TokenizerEatWhitespace(toker); | |
Viny_Token_Kind_ result; | |
switch (txtStr(toker->t)[toker->cursor]) { | |
case '\0': result = Viny_Token_Kind_EOF_; break; | |
case ':': viny_TokenizerAdvance(toker, 1); result = Viny_Token_Kind_COLON_; break; | |
case '{': viny_TokenizerAdvance(toker, 1); result = Viny_Token_Kind_BRACE_OPEN_; break; | |
case '}': viny_TokenizerAdvance(toker, 1); result = Viny_Token_Kind_BRACE_CLOSE_; break; | |
case '"': case '\'': { | |
Char quote = txtStr(toker->t)[toker->cursor]; | |
viny_TokenizerAdvance(toker, 1); | |
toker->token.ts = viny_TokenizeString(toker, quote); | |
if (txtStr(toker->t)[toker->cursor] != quote) return cast_val(Viny_Token_Kind_, viny_TokerError(toker, labelMake("No closing quote"))); | |
result = Viny_Token_Kind_STRING_; | |
viny_TokenizerAdvance(toker, 1); | |
} break; | |
default: { | |
toker->token.ts = viny_TokenizeString(toker, 0); | |
result = Viny_Token_Kind_STRING_; | |
} break; | |
} | |
toker->token.kind = result; | |
return true; | |
} | |
header_function | |
Viny_Tokenizer_ viny_TokenizerCreate (Txt t) | |
{ | |
Viny_Tokenizer_ tok = {.t = t, .line = 1, .column = 1}; | |
viny_Tokenize(&tok); | |
return tok; | |
} | |
header_function | |
Bool viny_TokenMatch (Viny_Tokenizer_ *toker, Viny_Token_Kind_ kind) | |
{ | |
if (toker->token.kind == kind) return true; | |
return false; | |
} | |
header_function | |
Bool viny_TokenConfirm (Viny_Tokenizer_ *toker, Viny_Token_Kind_ kind) | |
{ | |
if (viny_TokenMatch(toker, kind)) { | |
viny_Tokenize(toker); | |
return true; | |
} | |
return false; | |
} | |
header_function Bool viny_ParseTable (Viny *v, Viny_Tokenizer_ *toker, Viny_Table *tbl); | |
header_function | |
Bool viny_ParseTable (Viny *v, Viny_Tokenizer_ *toker, Viny_Table *tbl) | |
{ | |
listReset(&tbl->sequential); | |
while (viny_TokenMatch(toker, Viny_Token_Kind_STRING_)) { | |
Viny_Entry *entry = cast_val(Viny_Entry*, memPushAllot(&v->mp, sizeof(*entry))); | |
memzero(entry); | |
listAddBefore(&entry->sequential, &tbl->sequential); | |
listReset(&entry->bucket_list); | |
entry->key = toker->token.ts; | |
viny_Tokenize(toker); | |
if (viny_TokenConfirm(toker, Viny_Token_Kind_COLON_)) { | |
if (viny_TokenConfirm(toker, Viny_Token_Kind_BRACE_OPEN_)) { | |
Viny_Table *valtbl = cast_val(Viny_Table*, memPushAllot(&v->mp, sizeof(*valtbl))); | |
btreeCreate(&valtbl->btree, v->push_miface); | |
if (!viny_ParseTable(v, toker, valtbl)) return false; | |
entry->value.table = valtbl; | |
if (!viny_TokenConfirm(toker, Viny_Token_Kind_BRACE_CLOSE_)) return viny_TokerError(toker, labelMake("Closing brace expected when parsing table")); | |
} else if (viny_TokenMatch(toker, Viny_Token_Kind_STRING_)) { | |
entry->value.string = toker->token.ts; | |
viny_Tokenize(toker); | |
} else { | |
return viny_TokerError(toker, labelMake("No valid token found while parsing table")); | |
} | |
} else { | |
memzero(&entry->value); | |
} | |
// TODO(naman): First check if the key being inserted has already not been added. | |
// If it has, replace it instead of adding a new one. | |
List_Node *list_node = cast_val(List_Node*, btreeSearch(&tbl->btree, entry->key.hash)); | |
if (list_node == nullptr) { | |
list_node = cast_val(List_Node*, memPushAllot(&v->mp, sizeof(*list_node))); | |
listReset(list_node); | |
btreeInsert(&tbl->btree, entry->key.hash, list_node); | |
} | |
listAddBefore(&entry->bucket_list, list_node); | |
} | |
return true; | |
} | |
header_function | |
Bool vinyParseTxt (Viny *v, Txt t) | |
{ | |
Viny_Tokenizer_ toker = viny_TokenizerCreate(t); | |
Bool result = viny_ParseTable(v, &toker, &v->global_table); | |
return result && viny_TokenMatch(&toker, Viny_Token_Kind_EOF_); | |
} | |
typedef struct Viny_Printer_ { | |
PRINT_FUNC((*print_func)); | |
void *print_userdata; | |
Uint indent; | |
Byte _pad[4]; | |
} Viny_Printer_; | |
header_function | |
void viny_PrintRecursive (Viny_Printer_ *vpr, Viny_Table *tbl) | |
{ | |
Uint indent = cast_val(Uint, vpr->indent); | |
vpr->indent++; | |
LIST_FOR_EACH(l, &tbl->sequential) { | |
Viny_Entry *entry = cast_val(Viny_Entry *, containerof(l, Viny_Entry, sequential)); | |
if (entry->value.table) { | |
for (Uint i = 0; i < indent; i++) vpr->print_func(vpr->print_userdata, " "); | |
vpr->print_func(vpr->print_userdata, "\"%.*s\": {\n", | |
cast_val(Sint, txtspanLen(entry->key)), txtspanStr(entry->key)); | |
viny_PrintRecursive(vpr, entry->value.table); | |
for (Uint i = 0; i < indent; i++) vpr->print_func(vpr->print_userdata, " "); | |
vpr->print_func(vpr->print_userdata, "}\n"); | |
} else if (txtspanLen(entry->value.string)) { | |
for (Uint i = 0; i < indent; i++) vpr->print_func(vpr->print_userdata, " "); | |
vpr->print_func(vpr->print_userdata, "\"%.*s\": \"%.*s\"\n", | |
cast_val(Sint, txtspanLen(entry->key)), txtspanStr(entry->key), | |
cast_val(Sint, txtspanLen(entry->value.string)), txtspanStr(entry->value.string)); | |
} else { | |
for (Uint i = 0; i < indent; i++) vpr->print_func(vpr->print_userdata, " "); | |
vpr->print_func(vpr->print_userdata, "\"%.*s\"\n", | |
cast_val(Sint, txtspanLen(entry->key)), txtspanStr(entry->key)); | |
} | |
} | |
vpr->indent--; | |
} | |
header_function | |
void vinyPrint (Viny *v, PRINT_FUNC((*print_func)), void *print_userdata) | |
{ | |
Viny_Printer_ vpr = {.print_func = print_func, .print_userdata = print_userdata}; | |
viny_PrintRecursive(&vpr, &v->global_table); | |
} | |
header_function | |
Viny_Entry* viny_QueryRecursive (Viny_Entry *entry, va_list args) | |
{ | |
if (entry == nullptr) return nullptr; | |
Char *arg = va_arg(args, Char*); | |
if (arg == nullptr) return entry; | |
Size argsize = strlen(arg); | |
Uint64 arghash = hash64(arg, argsize, 0); | |
List_Node *list = cast_val(List_Node*, btreeSearch(&entry->value.table->btree, arghash)); | |
if (list == nullptr) return nullptr; | |
LIST_FOR_EACH(l, list) { | |
Viny_Entry *e = cast_val(Viny_Entry *, containerof(l, Viny_Entry, bucket_list)); | |
if ((argsize == txtspanLen(e->key)) && | |
(memcmp(txtspanStr(e->key), arg, argsize) == 0)) { | |
if (e->value.table) { | |
return viny_QueryRecursive(e, args); | |
} else { | |
Char *nextarg = va_arg(args, Char*); | |
if (nextarg) { | |
return nullptr; | |
} else { | |
return e; | |
} | |
} | |
} | |
} | |
return nullptr; | |
} | |
header_function | |
Viny_Entry* viny_QueryEntry (Viny_Entry *ve, ...) | |
{ | |
if (ve == nullptr) return nullptr; | |
va_list args; | |
va_start(args, ve); | |
Viny_Entry *result = viny_QueryRecursive(ve, args); | |
va_end(args); | |
return result; | |
} | |
#define vinyQuery(_v, ...) viny_QueryEntry(&(_v)->global_entry, __VA_ARGS__, nullptr) | |
#define vinyQueryEntry(_ve, ...) viny_QueryEntry((_ve), __VA_ARGS__, nullptr) | |
header_function | |
Viny_Entry* viny_EntryHasTable (Viny_Entry *ve) | |
{ | |
if (ve == nullptr) return nullptr; | |
if (ve->value.table) return ve; | |
return nullptr; | |
} | |
header_function | |
Viny_Entry* viny_EntryHasString (Viny_Entry *ve) | |
{ | |
if (ve == nullptr) return nullptr; | |
if (txtspanLen(ve->value.string)) return ve; | |
return nullptr; | |
} | |
header_function | |
Viny_Entry* viny_EntryHasEmpty (Viny_Entry *ve) | |
{ | |
if (ve == nullptr) return nullptr; | |
if (!viny_EntryHasTable(ve) && !viny_EntryHasString(ve)) return ve; | |
return nullptr; | |
} | |
#define vinyQueryForTable(_v, ...) viny_EntryHasTable (vinyQuery(_v, __VA_ARGS__)) | |
#define vinyQueryForString(_v, ...) viny_EntryHasString(vinyQuery(_v, __VA_ARGS__)) | |
#define vinyQueryForEmpty(_v, ...) viny_EntryHasEmpty (vinyQuery(_v, __VA_ARGS__)) | |
#define vinyQueryEntryForTable(_ve, ...) viny_EntryHasTable (vinyQueryEntry(_ve, __VA_ARGS__)) | |
#define vinyQueryEntryForString(_ve, ...) viny_EntryHasString(vinyQueryEntry(_ve, __VA_ARGS__)) | |
#define vinyQueryEntryForEmpty(_ve, ...) viny_EntryHasEmpty (vinyQueryEntry(_ve, __VA_ARGS__)) | |
/****************************************************************************************** | |
* C++ Data Structures | |
*****************************************************************************************/ | |
#if defined(ENV_LANG_CXX) | |
# if defined(ENV_LANF_CPP) && (ENV_LANF_CPP < 2011) | |
template<class T> | |
T* addressof(T& arg) noexcept | |
{ | |
return reinterpret_cast<T*>(&const_cast<Byte&>(reinterpret_cast<const volatile Byte&>(arg))); | |
} | |
# endif | |
template<typename T> | |
struct Vector { | |
Memory_Allocator_Interface miface; | |
Size cap = 0; // NOTE(naman): Maximum number of elements that can be stored | |
Size len = 0; // NOTE(naman): Count of elements actually stored | |
T *buf = nullptr; | |
void init_ (Memory_Allocator_Interface MIface) { | |
cap = 0; | |
len = 0; | |
buf = nullptr; | |
miface = MIface; | |
} | |
Vector(Memory_Allocator_Interface MIface) { | |
init_(MIface); | |
} | |
Vector(Memory_Allocator_Interface MIface, Size elems) { | |
init_(MIface); | |
GrowToCap(elems); | |
} | |
Vector(Vector &t) { | |
cap = t.cap; | |
len = t.len; | |
buf = t.buf; | |
miface = t.miface; | |
} | |
~Vector () { | |
for (Size i = 0; i < cap; i++) { | |
(addressof(buf[i]))->~T(); | |
} | |
memIRescind(miface, buf); | |
cap = 0; | |
len = 0; | |
} | |
T& operator[] (Size index) const { | |
return buf[index]; | |
} | |
void AddInPlace_ (Size index, T elem) { | |
buf[index] = elem; | |
} | |
Bool IsFull_ () { | |
return len + 1 > cap; | |
} | |
void Grow () { | |
GrowToCap(2 * cap); | |
} | |
void Add (T elem) { | |
if (unlikely(IsFull_())) Grow(); | |
buf[len] = elem; | |
len++; | |
} | |
void RemoveUnsorted (Size index) { | |
buf[index] = buf[len - 1]; | |
len--; | |
} | |
void Clear () { | |
memset(buf, 0, len * sizeof(T)); | |
len = 0; | |
} | |
void Resize (Size new_count) { | |
if (new_count > cap) { | |
GrowToCap(new_count); | |
} | |
} | |
Size SizeOf () { return len * sizeof(T); } | |
Size ElemIn () { return len; } | |
Size MaxSizeOf () { return cap * sizeof(T); } | |
Size MaxElemIn () { return cap * sizeof(T); } | |
Bool IsEmpty () { return len == 0; } | |
Bool IsNotEmpty () { return !IsEmpty(); } | |
Bool IsNull () { return buf == nullptr; } | |
T* PtrOnePastLast () { | |
return buf + len; | |
} | |
T* PtrLast () { | |
return buf + (len - 1); | |
} | |
T* PtrFirst () { | |
return buf; | |
} | |
void GrowToSize_ (Size new_size) { | |
if (IsNull()) { | |
buf = static_cast<T*>(memIRequest(miface, new_size)); | |
} else { | |
T *new_buf = static_cast<T*>(memIRequest(miface, new_size)); | |
memcpy(new_buf, buf, cap * sizeof(T)); | |
memIRescind(miface, buf); | |
buf = new_buf; | |
} | |
} | |
void GrowToCap (Size elem_count) { | |
Size new_cap = maximum(elem_count, 16ULL); | |
GrowToSize_(new_cap * sizeof(T)); | |
for (Size i = cap; i < new_cap; i++) { | |
new (addressof(buf[i])) T(); | |
} | |
cap = new_cap; | |
} | |
void IncreaseCapBy (Size elem_count) { | |
Size new_cap = maximum(elem_count + len, 16ULL); | |
GrowToCap(new_cap); | |
cap = new_cap; | |
} | |
}; | |
template<typename T> | |
struct Stack { | |
Vector<T> data; | |
Stack (Memory_Allocator_Interface MIface) : data{MIface} {} | |
Stack (Memory_Allocator_Interface MIface, Size elems) : data{MIface, elems} {} | |
void Push (T elem) { | |
data.Add(elem); | |
} | |
T Peek () { // NOTE(naman): Check if stack is empty before calling | |
T elem = *data.PtrLast(); | |
return elem; | |
} | |
T Pop () { // NOTE(naman): Check if stack is empty before calling | |
T elem = Peek(); | |
data.len--; | |
return elem; | |
} | |
Bool IsEmpty () { | |
return data.IsEmpty(); | |
} | |
Bool IsNotEmpty () { | |
return !IsEmpty(); | |
} | |
Size ElemIn () { | |
return data.ElemIn(); | |
} | |
}; | |
// TODO(naman): Convert this from two-stack approach to some better (more performant) one. | |
// Right now, the amortized time is constant, but it's not real-time. | |
template<typename T> | |
struct Queue { // Double Ended | |
Stack<T> head, tail; | |
Queue (Memory_Allocator_Interface MIface) : head{MIface}, tail{MIface} {} | |
Queue (Memory_Allocator_Interface MIface, Size elems) : head{MIface, elems}, tail{MIface, elems} {} | |
void PushFront (T elem) { | |
head.Push(elem); | |
} | |
T PeekFront () { // Check if queue is empty before calling | |
if (head.IsEmpty()) { | |
debugAssert(tail.IsEmpty() == false); | |
Size iter = max(1ULL, tail.ElemIn() / 2); | |
for (Size i = 0; i < iter; i++) { | |
head.Push(tail.Pop()); | |
} | |
} | |
return head.Peek(); | |
} | |
T PopFront () { // Check if queue is empty before calling | |
PeekFront(); | |
return head.Pop(); | |
} | |
void PushBack (T elem) { | |
tail.Push(elem); | |
} | |
T PeekBack () { // Check if queue is empty before calling | |
if (tail.IsEmpty()) { | |
debugAssert(head.IsEmpty() == false); | |
Size iter = max(1, head.ElemIn() / 2); | |
for (Size i = 0; i < iter; i++) { | |
tail.Push(head.Pop()); | |
} | |
} | |
return tail.Peek(); | |
} | |
T PopBack () { // Check if queue is empty before calling | |
PeekBack(); | |
return tail.Pop(); | |
} | |
Bool IsEmpty () { | |
return head.IsEmpty() && tail.IsEmpty(); | |
} | |
Bool IsNotEmpty () { | |
return !IsEmpty(); | |
} | |
Size ElemIn () { | |
return head.ElemIn() + tail.ElemIn(); | |
} | |
}; | |
#endif | |
#define STD_H_INCLUDE_GUARD | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment