Created
March 29, 2026 20:49
-
-
Save NickTomlin/d3b8eedd261fd0537f6ac6d8f577affb to your computer and use it in GitHub Desktop.
NaN boxing explainer — how clox packs type + value into 8 bytes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // NaN Boxing Explainer | |
| // Build: cc -Wall -Wextra -O2 -o nan_explainer nan_boxing_explainer.c && ./nan_explainer | |
| // No dependencies beyond libc. | |
| #include <assert.h> | |
| #include <stdint.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Helpers | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Print 64 bits grouped as: S EEEEEEEEEE EMMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM | |
| // (sign | 11-bit exponent | 52-bit mantissa, in groups of 4) | |
| static void print_bits64(uint64_t v) { | |
| // sign | |
| printf("%llu ", (unsigned long long)((v >> 63) & 1)); | |
| // exponent (11 bits: 62..52) | |
| for (int i = 62; i >= 52; i--) printf("%llu", (unsigned long long)((v >> i) & 1)); | |
| printf(" "); | |
| // mantissa (52 bits: 51..0), in groups of 4 | |
| for (int i = 51; i >= 0; i--) { | |
| printf("%llu", (unsigned long long)((v >> i) & 1)); | |
| if (i > 0 && i % 4 == 0) printf(" "); | |
| } | |
| } | |
| static uint64_t double_bits(double d) { | |
| uint64_t u; | |
| memcpy(&u, &d, sizeof(u)); | |
| return u; | |
| } | |
| static void ruler(void) { | |
| printf("────────────────────────────────────────────────────────────────────────────────\n"); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Section 1: Bit Manipulation Basics | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static void section_bit_ops(void) { | |
| ruler(); | |
| printf("SECTION 1: Bit Manipulation — the only four operations NaN boxing needs\n"); | |
| ruler(); | |
| printf("\nNaN boxing is pure bit manipulation. No arithmetic, no loops. Just four\n"); | |
| printf("bitwise operations on 64-bit integers. Let's get comfortable with them.\n\n"); | |
| // ── AND ────────────────────────────────────────────────────────────────── | |
| printf("── AND (&): masking — zero out bits you don't care about ──────────────────\n\n"); | |
| printf(" Rule: 1 & 1 = 1, 1 & 0 = 0, 0 & 1 = 0, 0 & 0 = 0\n"); | |
| printf(" A bit survives only if BOTH sides are 1. Use a mask of 1s to \"keep\" bits.\n\n"); | |
| uint64_t val = (uint64_t)0xDEADBEEFCAFEBABEULL; | |
| uint64_t mask = (uint64_t)0xFF00000000000000ULL; | |
| uint64_t res = val & mask; | |
| printf(" value = 0x%016llX\n", (unsigned long long)val); | |
| printf(" mask = 0x%016llX (only top byte is 1s)\n", (unsigned long long)mask); | |
| printf(" result = 0x%016llX (only top byte survives)\n\n", (unsigned long long)res); | |
| printf(" In binary (S EEEEEEEEEEE MMMM…):\n"); | |
| printf(" value = "); print_bits64(val); printf("\n"); | |
| printf(" mask = "); print_bits64(mask); printf("\n"); | |
| printf(" result = "); print_bits64(res); printf("\n\n"); | |
| // ── OR ─────────────────────────────────────────────────────────────────── | |
| printf("── OR (|): setting — force specific bits to 1 ─────────────────────────────\n\n"); | |
| printf(" Rule: 1 | 1 = 1, 1 | 0 = 1, 0 | 1 = 1, 0 | 0 = 0\n"); | |
| printf(" A bit becomes 1 if EITHER side is 1. Use a mask to \"stamp\" bits in.\n\n"); | |
| uint64_t base = (uint64_t)0x0000000000000042ULL; | |
| uint64_t stamp = (uint64_t)0x7FFC000000000000ULL; // QNAN (preview) | |
| uint64_t boxed = base | stamp; | |
| printf(" base = 0x%016llX\n", (unsigned long long)base); | |
| printf(" stamp = 0x%016llX (the QNAN pattern — details in Section 3)\n", (unsigned long long)stamp); | |
| printf(" result = 0x%016llX (base with QNAN bits forced on)\n\n", (unsigned long long)boxed); | |
| // ── NOT ────────────────────────────────────────────────────────────────── | |
| printf("── NOT (~): complement — flip every single bit ────────────────────────────\n\n"); | |
| printf(" Rule: ~1 = 0, ~0 = 1\n"); | |
| printf(" Used once in NaN boxing: to build the \"erase these bits\" mask for AS_OBJ.\n\n"); | |
| uint64_t qnan_sign = (uint64_t)0xFFFC000000000000ULL; | |
| uint64_t eraser = ~qnan_sign; | |
| printf(" bits_to_erase = 0x%016llX\n", (unsigned long long)qnan_sign); | |
| printf(" ~bits_to_erase= 0x%016llX (everything else is 1 → AND keeps the rest)\n\n", | |
| (unsigned long long)eraser); | |
| // ── Masking + equality = type check ────────────────────────────────────── | |
| printf("── Masking + equality: the pattern for every type check ────────────────────\n\n"); | |
| printf(" \"Is this value a quiet NaN?\"\n"); | |
| printf(" Answer: mask out the QNAN bits, then compare the result to QNAN.\n\n"); | |
| uint64_t qnan = (uint64_t)0x7FFC000000000000ULL; | |
| uint64_t number = double_bits(3.14); | |
| uint64_t nan_val = qnan | 1; // a NaN-boxed nil | |
| printf(" QNAN = 0x%016llX\n", (unsigned long long)qnan); | |
| printf(" 3.14 bits = 0x%016llX → (bits & QNAN) = 0x%016llX != QNAN → number ✓\n", | |
| (unsigned long long)number, (unsigned long long)(number & qnan)); | |
| printf(" nil bits = 0x%016llX → (bits & QNAN) = 0x%016llX == QNAN → not a number ✓\n\n", | |
| (unsigned long long)nan_val, (unsigned long long)(nan_val & qnan)); | |
| printf(" This one-liner — (value & QNAN) != QNAN — is IS_NUMBER in clox.\n\n"); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Section 2: IEEE 754 Double Format | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static void print_double_breakdown(const char *label, double d) { | |
| uint64_t bits = double_bits(d); | |
| uint64_t sign = (bits >> 63) & 1; | |
| uint64_t exp = (bits >> 52) & 0x7FF; | |
| uint64_t mant = bits & (uint64_t)0x000FFFFFFFFFFFFFULL; | |
| printf(" %-12s hex=0x%016llX\n", label, (unsigned long long)bits); | |
| printf(" bits: "); print_bits64(bits); printf("\n"); | |
| printf(" sign=%llu exp=0x%03llX(%llu) mantissa=0x%013llX\n", | |
| (unsigned long long)sign, | |
| (unsigned long long)exp, (unsigned long long)exp, | |
| (unsigned long long)mant); | |
| // Classify | |
| if (exp == 0x7FF) { | |
| if (mant == 0) { | |
| printf(" → INFINITY (exp=all-ones, mantissa=0)\n"); | |
| } else if (mant & (uint64_t)0x0008000000000000ULL) { | |
| printf(" → QUIET NaN (exp=all-ones, mantissa bit 51=1)\n"); | |
| } else { | |
| printf(" → SIGNALING NaN (exp=all-ones, mantissa bit 51=0)\n"); | |
| } | |
| } else { | |
| printf(" → normal number (exp not all-ones)\n"); | |
| } | |
| printf("\n"); | |
| } | |
| static void section_ieee754(void) { | |
| ruler(); | |
| printf("SECTION 2: IEEE 754 Double-Precision — the bit layout every double uses\n"); | |
| ruler(); | |
| printf("\nEvery 64-bit double is laid out like this:\n\n"); | |
| printf(" Bit: 63 62────────52 51──────────────────────────────────0\n"); | |
| printf(" S EEEEEEEEEEE MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n"); | |
| printf(" sign exponent(11) mantissa / significand (52 bits)\n\n"); | |
| printf(" sign: 0 = positive, 1 = negative\n"); | |
| printf(" exponent: biased by 1023 (so stored 1023 means 'times 2^0')\n"); | |
| printf(" mantissa: the fractional digits of the number in binary\n\n"); | |
| printf(" The special case we care about: when ALL 11 exponent bits are 1\n"); | |
| printf(" (i.e. exp == 0x7FF), the value is no longer a normal number:\n"); | |
| printf(" mantissa == 0 → Infinity\n"); | |
| printf(" mantissa != 0,\n"); | |
| printf(" bit 51 == 0 → Signaling NaN (may trap on some CPUs)\n"); | |
| printf(" bit 51 == 1 → Quiet NaN (safe to pass around)\n\n"); | |
| printf(" Let's see real examples:\n\n"); | |
| print_double_breakdown("1.0", 1.0); | |
| print_double_breakdown("-1.0", -1.0); | |
| print_double_breakdown("0.0", 0.0); | |
| print_double_breakdown("1e300", 1e300); | |
| // Infinity via division | |
| double inf = 1.0 / 0.0; | |
| print_double_breakdown("+Inf", inf); | |
| // NaN via 0/0 — use volatile to prevent compile-time folding | |
| volatile double zero = 0.0; | |
| double nan_d = zero / zero; | |
| print_double_breakdown("0.0/0.0", nan_d); | |
| printf(" Notice: the CPU-generated NaN has bit 51 = 1 (quiet NaN). That's the\n"); | |
| printf(" bit pattern territory we want to claim for our own use.\n\n"); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Section 3: The QNAN Constant | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static void section_qnan(void) { | |
| ruler(); | |
| printf("SECTION 3: The QNAN Constant — which quiet NaN bits clox claims\n"); | |
| ruler(); | |
| printf("\nThere are 2^51 = 2,251,799,813,685,248 distinct quiet NaN bit patterns.\n"); | |
| printf("The CPU only ever produces a tiny fraction of them for real arithmetic.\n"); | |
| printf("clox claims a few of those unused patterns to represent nil, true, false,\n"); | |
| printf("and object pointers.\n\n"); | |
| printf(" clox defines:\n"); | |
| printf(" QNAN = 0x7FFC000000000000\n"); | |
| printf(" SIGN_BIT = 0x8000000000000000\n\n"); | |
| uint64_t qnan = 0x7FFC000000000000ULL; | |
| uint64_t sign_bit = 0x8000000000000000ULL; | |
| printf(" QNAN in binary:\n"); | |
| printf(" "); print_bits64(qnan); printf("\n"); | |
| printf(" S EEEEEEEEEEE MMMM…\n"); | |
| printf(" 0 11111111111 1100 0000 … 0000\n"); | |
| printf(" ^^^^\n"); | |
| printf(" ||└─ bit 50: extra 1 to avoid Intel's 'QNaN Indefinite' value\n"); | |
| printf(" |└── bit 51: the 'quiet NaN' marker bit\n"); | |
| printf(" └─── bits 62-52: all-ones exponent = NaN territory\n\n"); | |
| printf(" Why not just 0x7FF8000000000000 (minimum quiet NaN)?\n"); | |
| printf(" Intel reserves 0x7FF8000000000000 as 'QNaN Floating-Point Indefinite'.\n"); | |
| printf(" By also setting bit 50 we step safely clear of that value.\n\n"); | |
| printf(" SIGN_BIT in binary:\n"); | |
| printf(" "); print_bits64(sign_bit); printf("\n"); | |
| printf(" └── bit 63: the IEEE 754 sign bit (1 = negative for normal doubles)\n"); | |
| printf(" For NaN-boxed values this bit has nothing to do with the sign of a number.\n"); | |
| printf(" clox repurposes it as the 'this value is an Obj pointer' tag.\n\n"); | |
| printf(" Together, SIGN_BIT | QNAN sets these bits:\n"); | |
| printf(" "); print_bits64(sign_bit | qnan); printf("\n\n"); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Section 4: Numbers | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| typedef uint64_t Value; | |
| #define QNAN ((uint64_t)0x7FFC000000000000ULL) | |
| #define SIGN_BIT ((uint64_t)0x8000000000000000ULL) | |
| static inline Value numToValue(double num) { | |
| Value v; | |
| memcpy(&v, &num, sizeof(double)); | |
| return v; | |
| } | |
| static inline double valueToNum(Value v) { | |
| double num; | |
| memcpy(&num, &v, sizeof(Value)); | |
| return num; | |
| } | |
| #define IS_NUMBER(v) (((v) & QNAN) != QNAN) | |
| #define NUMBER_VAL(n) numToValue(n) | |
| #define AS_NUMBER(v) valueToNum(v) | |
| static void section_numbers(void) { | |
| ruler(); | |
| printf("SECTION 4: Numbers — the easiest case, zero conversion needed\n"); | |
| ruler(); | |
| printf("\nA Lox number is just a normal IEEE 754 double. No bit transformation.\n"); | |
| printf("The only trick is convincing the C compiler to treat those same bits as\n"); | |
| printf("a uint64_t — that's what the memcpy() dance does.\n\n"); | |
| printf(" numToValue / valueToNum use memcpy() for 'type punning':\n"); | |
| printf(" memcpy(&value, &num, 8) → same 64 bits, now typed as uint64_t\n"); | |
| printf(" memcpy(&num, &value, 8) → same 64 bits, now typed as double\n"); | |
| printf(" Modern compilers optimise this to zero instructions.\n\n"); | |
| printf(" IS_NUMBER(v) = ((v & QNAN) != QNAN)\n"); | |
| printf(" Normal doubles never have all 11 exponent bits set,\n"); | |
| printf(" so (bits & QNAN) will never equal QNAN for a real number.\n\n"); | |
| double nums[] = { 3.14, 0.0, -42.5, 1e300 }; | |
| const char *labels[] = { "3.14", "0.0", "-42.5", "1e300" }; | |
| for (int i = 0; i < 4; i++) { | |
| Value v = NUMBER_VAL(nums[i]); | |
| double back = AS_NUMBER(v); | |
| assert(back == nums[i]); | |
| printf(" %-6s bits=0x%016llX IS_NUMBER=%d AS_NUMBER=%.6g\n", | |
| labels[i], | |
| (unsigned long long)v, | |
| IS_NUMBER(v) ? 1 : 0, | |
| back); | |
| } | |
| printf("\n All IS_NUMBER checks pass: none of these doubles have QNAN bits set.\n\n"); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Section 5: Nil, True, False | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| #define TAG_NIL 1 | |
| #define TAG_FALSE 2 | |
| #define TAG_TRUE 3 | |
| #define NIL_VAL ((Value)(QNAN | TAG_NIL)) | |
| #define FALSE_VAL ((Value)(QNAN | TAG_FALSE)) | |
| #define TRUE_VAL ((Value)(QNAN | TAG_TRUE)) | |
| #define IS_NIL(v) ((v) == NIL_VAL) | |
| #define IS_BOOL(v) (((v) | 1) == TRUE_VAL) | |
| #define BOOL_VAL(b) ((b) ? TRUE_VAL : FALSE_VAL) | |
| #define AS_BOOL(v) ((v) == TRUE_VAL) | |
| static void section_singletons(void) { | |
| ruler(); | |
| printf("SECTION 5: Nil, True, False — three sentinel values in the mantissa\n"); | |
| ruler(); | |
| printf("\nWe have 51 spare mantissa bits in our QNAN values. We use the lowest 2\n"); | |
| printf("as a 'type tag' to distinguish the three singleton values:\n\n"); | |
| printf(" TAG_NIL = %d (binary: 01)\n", TAG_NIL); | |
| printf(" TAG_FALSE = %d (binary: 10)\n", TAG_FALSE); | |
| printf(" TAG_TRUE = %d (binary: 11)\n\n", TAG_TRUE); | |
| printf(" NIL_VAL = QNAN | 01 = 0x%016llX\n", (unsigned long long)NIL_VAL); | |
| printf(" FALSE_VAL = QNAN | 10 = 0x%016llX\n", (unsigned long long)FALSE_VAL); | |
| printf(" TRUE_VAL = QNAN | 11 = 0x%016llX\n\n", (unsigned long long)TRUE_VAL); | |
| printf(" In binary (notice only the last two mantissa bits differ):\n"); | |
| printf(" NIL "); print_bits64(NIL_VAL); printf("\n"); | |
| printf(" FALSE "); print_bits64(FALSE_VAL); printf("\n"); | |
| printf(" TRUE "); print_bits64(TRUE_VAL); printf("\n\n"); | |
| printf("── IS_NIL: simple equality ─────────────────────────────────────────────────\n\n"); | |
| printf(" IS_NIL(v) = (v == NIL_VAL)\n"); | |
| printf(" There is only one nil, so exact equality is fine.\n\n"); | |
| printf(" IS_NIL(NIL_VAL) = %d ✓\n", IS_NIL(NIL_VAL) ? 1 : 0); | |
| printf(" IS_NIL(FALSE_VAL) = %d ✓\n\n", IS_NIL(FALSE_VAL) ? 1 : 0); | |
| printf("── IS_BOOL: the clever OR trick ────────────────────────────────────────────\n\n"); | |
| printf(" Naive version: (v == TRUE_VAL || v == FALSE_VAL)\n"); | |
| printf(" Problem: macros expand their argument every time they mention it.\n"); | |
| printf(" If 'v' is an expression with side effects, it would run twice.\n\n"); | |
| printf(" Safe version: ((v | 1) == TRUE_VAL)\n\n"); | |
| printf(" Why this works:\n"); | |
| printf(" FALSE_VAL ends in ...10\n"); | |
| printf(" TRUE_VAL ends in ...11\n"); | |
| printf(" (FALSE_VAL | 1) sets the low bit → ...11 = TRUE_VAL ✓\n"); | |
| printf(" (TRUE_VAL | 1) sets the low bit → ...11 = TRUE_VAL ✓\n"); | |
| printf(" Any other value ORed with 1 will not equal TRUE_VAL.\n\n"); | |
| printf(" FALSE_VAL | 1 = 0x%016llX == TRUE_VAL? %d ✓\n", | |
| (unsigned long long)(FALSE_VAL | 1), (FALSE_VAL | 1) == TRUE_VAL ? 1 : 0); | |
| printf(" TRUE_VAL | 1 = 0x%016llX == TRUE_VAL? %d ✓\n", | |
| (unsigned long long)(TRUE_VAL | 1), (TRUE_VAL | 1) == TRUE_VAL ? 1 : 0); | |
| printf(" NIL_VAL | 1 = 0x%016llX == TRUE_VAL? %d ✓\n\n", | |
| (unsigned long long)(NIL_VAL | 1), (NIL_VAL | 1) == TRUE_VAL ? 1 : 0); | |
| printf("── AS_BOOL & BOOL_VAL ──────────────────────────────────────────────────────\n\n"); | |
| printf(" AS_BOOL(v) = (v == TRUE_VAL)\n"); | |
| printf(" If it's not true, it must be false — no other Booleans exist in Lox.\n\n"); | |
| printf(" BOOL_VAL(true) = 0x%016llX (%s)\n", | |
| (unsigned long long)BOOL_VAL(1), AS_BOOL(BOOL_VAL(1)) ? "true" : "false"); | |
| printf(" BOOL_VAL(false) = 0x%016llX (%s)\n\n", | |
| (unsigned long long)BOOL_VAL(0), AS_BOOL(BOOL_VAL(0)) ? "true" : "false"); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Section 6: Object Pointers | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| #define OBJ_VAL(ptr) ((Value)(SIGN_BIT | QNAN | (uint64_t)(uintptr_t)(ptr))) | |
| #define AS_OBJ(v) ((void*)(uintptr_t)((v) & ~(SIGN_BIT | QNAN))) | |
| #define IS_OBJ(v) (((v) & (QNAN | SIGN_BIT)) == (QNAN | SIGN_BIT)) | |
| static void section_objects(void) { | |
| ruler(); | |
| printf("SECTION 6: Object Pointers — sign bit as the 'this is a pointer' tag\n"); | |
| ruler(); | |
| printf("\nWe need to store a 64-bit pointer inside a NaN. Here's why we can:\n\n"); | |
| printf(" On x86-64, pointers are technically 64-bit but only 48 bits are used.\n"); | |
| printf(" Bits 63-48 are always 0 in user-space addresses.\n"); | |
| printf(" malloc() / heap allocations respect this — let's verify:\n\n"); | |
| // Allocate something on the heap and show its address | |
| void *dummy = malloc(64); | |
| assert(dummy != NULL); | |
| uint64_t ptr_bits = (uint64_t)(uintptr_t)dummy; | |
| printf(" malloc() returned: %p\n", dummy); | |
| printf(" as uint64_t: 0x%016llX\n", (unsigned long long)ptr_bits); | |
| printf(" in binary: "); print_bits64(ptr_bits); printf("\n"); | |
| printf(" top 16 bits are: 0x%04llX (always 0 on x86-64 user-space)\n\n", | |
| (unsigned long long)(ptr_bits >> 48)); | |
| printf(" We have 51 spare bits in our QNAN values.\n"); | |
| printf(" A 48-bit pointer fits inside those bits with 3 bits left over.\n\n"); | |
| printf(" But which bit do we use as the tag to say 'this is a pointer'?\n"); | |
| printf(" We use the sign bit (bit 63). When set along with QNAN bits, it can't\n"); | |
| printf(" be a real double (negative NaN values aren't produced by normal arithmetic\n"); | |
| printf(" in a way that would collide), so it's safe to claim.\n\n"); | |
| printf("── OBJ_VAL: encoding ───────────────────────────────────────────────────────\n\n"); | |
| printf(" OBJ_VAL(ptr) = SIGN_BIT | QNAN | (uint64_t)ptr\n\n"); | |
| Value encoded = OBJ_VAL(dummy); | |
| printf(" ptr = 0x%016llX\n", (unsigned long long)ptr_bits); | |
| printf(" QNAN = 0x%016llX\n", (unsigned long long)QNAN); | |
| printf(" SIGN = 0x%016llX\n", (unsigned long long)SIGN_BIT); | |
| printf(" encoded = 0x%016llX\n\n", (unsigned long long)encoded); | |
| printf(" Encoded in binary:\n"); | |
| printf(" ptr "); print_bits64(ptr_bits); printf("\n"); | |
| printf(" encoded "); print_bits64(encoded); printf("\n"); | |
| printf(" ^──────── sign bit now 1 (pointer tag)\n"); | |
| printf(" ^───────── exponent bits all 1 (QNAN territory)\n"); | |
| printf(" ^─ bit 51 & 50 set (our QNAN pattern)\n\n"); | |
| printf("── AS_OBJ: decoding ────────────────────────────────────────────────────────\n\n"); | |
| printf(" AS_OBJ(v) = (void*)(v & ~(SIGN_BIT | QNAN))\n\n"); | |
| printf(" ~(SIGN_BIT | QNAN) = 0x%016llX\n", (unsigned long long)~(SIGN_BIT | QNAN)); | |
| printf(" This mask has 0s exactly where the tag bits are, 1s everywhere else.\n"); | |
| printf(" AND-ing with it strips the tag, leaving the raw pointer.\n\n"); | |
| void *recovered = AS_OBJ(encoded); | |
| printf(" Recovered ptr = %p (== original? %s)\n\n", | |
| recovered, recovered == dummy ? "yes ✓" : "NO — bug!"); | |
| printf("── IS_OBJ: detection ───────────────────────────────────────────────────────\n\n"); | |
| printf(" IS_OBJ(v) = ((v & (QNAN | SIGN_BIT)) == (QNAN | SIGN_BIT))\n\n"); | |
| printf(" Why not just check the sign bit?\n"); | |
| printf(" Because negative doubles ALSO have the sign bit set, but they are\n"); | |
| printf(" NOT in NaN territory (their exponent isn't all-ones).\n"); | |
| printf(" We need BOTH QNAN bits AND the sign bit to be set.\n\n"); | |
| double neg = -3.14; | |
| uint64_t neg_bits = double_bits(neg); | |
| printf(" -3.14 bits = 0x%016llX IS_OBJ=%d (sign set, but no QNAN → not an obj) ✓\n", | |
| (unsigned long long)neg_bits, IS_OBJ(neg_bits) ? 1 : 0); | |
| printf(" encoded = 0x%016llX IS_OBJ=%d ✓\n\n", | |
| (unsigned long long)encoded, IS_OBJ(encoded) ? 1 : 0); | |
| free(dummy); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Section 7: Decision Tree | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static const char *classify(Value v) { | |
| if (IS_NUMBER(v)) return "number"; | |
| if (IS_OBJ(v)) return "Obj pointer"; | |
| if (IS_NIL(v)) return "nil"; | |
| if (IS_BOOL(v)) return "bool"; | |
| return "unknown"; | |
| } | |
| static void section_dispatch(void) { | |
| ruler(); | |
| printf("SECTION 7: The Full Decision Tree\n"); | |
| ruler(); | |
| printf("\n"); | |
| printf(" Given a 64-bit Value v, the checks happen in this order:\n\n"); | |
| printf(" 1. (v & QNAN) != QNAN → number (no QNAN bits → normal double)\n"); | |
| printf(" 2. (v & (QNAN|SIGN_BIT))\n"); | |
| printf(" == (QNAN|SIGN_BIT) → Obj ptr (QNAN + sign bit set)\n"); | |
| printf(" 3. v == NIL_VAL → nil\n"); | |
| printf(" 4. (v | 1) == TRUE_VAL → bool\n\n"); | |
| printf(" Order matters: check IS_OBJ before IS_NIL/IS_BOOL, because the sign bit\n"); | |
| printf(" overlaps with the singleton values' tag space.\n\n"); | |
| void *fake_ptr = malloc(8); | |
| assert(fake_ptr != NULL); | |
| Value examples[] = { | |
| NUMBER_VAL(2.718), | |
| NUMBER_VAL(-99.0), | |
| NIL_VAL, | |
| TRUE_VAL, | |
| FALSE_VAL, | |
| OBJ_VAL(fake_ptr), | |
| }; | |
| const char *desc[] = { | |
| "NUMBER_VAL(2.718)", | |
| "NUMBER_VAL(-99.0)", | |
| "NIL_VAL", | |
| "TRUE_VAL", | |
| "FALSE_VAL", | |
| "OBJ_VAL(ptr)", | |
| }; | |
| printf(" %-22s 0x%-16s → %s\n", "Value", "bits", "type"); | |
| printf(" %-22s %-18s %s\n", "─────", "────", "────"); | |
| for (int i = 0; i < 6; i++) { | |
| printf(" %-22s 0x%016llX → %s\n", | |
| desc[i], | |
| (unsigned long long)examples[i], | |
| classify(examples[i])); | |
| } | |
| printf("\n"); | |
| free(fake_ptr); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Section 8: Connection to clox's value.h | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static void section_clox_macros(void) { | |
| ruler(); | |
| printf("SECTION 8: clox's value.h macros — annotated\n"); | |
| ruler(); | |
| printf("\n"); | |
| printf(" // Constants\n"); | |
| printf(" #define SIGN_BIT 0x8000000000000000 // bit 63: Obj pointer tag\n"); | |
| printf(" #define QNAN 0x7FFC000000000000 // bits 63-50: quiet NaN sentinel\n"); | |
| printf(" #define TAG_NIL 1 // 01 in lowest mantissa bits\n"); | |
| printf(" #define TAG_FALSE 2 // 10\n"); | |
| printf(" #define TAG_TRUE 3 // 11\n\n"); | |
| printf(" // Type checks\n"); | |
| printf(" #define IS_NUMBER(v) (((v) & QNAN) != QNAN) // §4: no QNAN → double\n"); | |
| printf(" #define IS_OBJ(v) (((v) & (QNAN|SIGN_BIT)) // §6: both tag bits set\n"); | |
| printf(" == (QNAN|SIGN_BIT))\n"); | |
| printf(" #define IS_NIL(v) ((v) == NIL_VAL) // §5: exact match\n"); | |
| printf(" #define IS_BOOL(v) (((v) | 1) == TRUE_VAL) // §5: OR trick\n\n"); | |
| printf(" // Sentinel values\n"); | |
| printf(" #define NIL_VAL (QNAN | TAG_NIL) = 0x%016llX\n", (unsigned long long)NIL_VAL); | |
| printf(" #define FALSE_VAL (QNAN | TAG_FALSE) = 0x%016llX\n", (unsigned long long)FALSE_VAL); | |
| printf(" #define TRUE_VAL (QNAN | TAG_TRUE) = 0x%016llX\n\n", (unsigned long long)TRUE_VAL); | |
| printf(" // Wrap C values into clox Values\n"); | |
| printf(" #define NUMBER_VAL(n) numToValue(n) // §4: memcpy bits double→uint64_t\n"); | |
| printf(" #define BOOL_VAL(b) ((b) ? TRUE_VAL : FALSE_VAL) // §5\n"); | |
| printf(" #define OBJ_VAL(ptr) (SIGN_BIT | QNAN | (uint64_t)ptr) // §6\n\n"); | |
| printf(" // Unwrap clox Values back to C types\n"); | |
| printf(" #define AS_NUMBER(v) valueToNum(v) // §4: memcpy bits uint64_t→double\n"); | |
| printf(" #define AS_BOOL(v) ((v) == TRUE_VAL) // §5\n"); | |
| printf(" #define AS_OBJ(v) ((Obj*)(v & ~(SIGN_BIT|QNAN))) // §6: strip tag bits\n\n"); | |
| printf(" // Memory layout comparison:\n"); | |
| printf(" // Tagged union Value: 16 bytes (8 byte tag + 8 byte payload + padding)\n"); | |
| printf(" // NaN-boxed Value: 8 bytes (one uint64_t)\n"); | |
| printf(" // Savings: 50%% → more Values fit in a cache line → fewer cache misses\n\n"); | |
| printf(" sizeof(Value) = %zu bytes\n\n", sizeof(Value)); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Main | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| int main(void) { | |
| printf("\n"); | |
| printf("████████████████████████████████████████████████████████████████████████████████\n"); | |
| printf(" NaN Boxing Explainer — how clox packs type + value into 8 bytes\n"); | |
| printf("████████████████████████████████████████████████████████████████████████████████\n"); | |
| printf("\n"); | |
| printf(" Legend for bit strings throughout this file:\n"); | |
| printf(" S EEEEEEEEEEE MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM MMMM\n"); | |
| printf(" │ ─────┬───── ──────────────────────────────┬─────────────────────────────\n"); | |
| printf(" │ │ │\n"); | |
| printf(" sign exponent (11 bits) mantissa (52 bits, groups of 4)\n\n"); | |
| section_bit_ops(); | |
| section_ieee754(); | |
| section_qnan(); | |
| section_numbers(); | |
| section_singletons(); | |
| section_objects(); | |
| section_dispatch(); | |
| section_clox_macros(); | |
| ruler(); | |
| printf("Done. All assertions passed.\n"); | |
| ruler(); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment