Last active
October 4, 2025 13:30
-
-
Save qntm/e8e7591960ed8b26b7b2eaec5ebce709 to your computer and use it in GitHub Desktop.
How many valid JSON strings are there?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| Code for enumerating valid JSON strings. | |
| Bounds checking is the responsibility of the caller. | |
| The case N = 0 is intentionally not handled. | |
| This code attempts to be somewhat readable without significantly impacting performance. | |
| https://qntm.org/jsoncount | |
| */ | |
| import assert from 'node:assert/strict' | |
| // First some constants. | |
| // Use BigInts throughout because we're going to be exceeding `Number.MAX_SAFE_INTEGER` at N = 5 | |
| const NUM_ES = BigInt(['E', 'e'].length) | |
| const NUM_SIGNS = BigInt(['-', '+'].length) | |
| const NUM_DIGITS = BigInt(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'].length) | |
| const MIN_CHAR = 0x0020n | |
| const MAX_CHAR = 0x10FFFFn | |
| const NUM_REGULAR_CHARS = (MAX_CHAR + 1n) - MIN_CHAR - BigInt(['\\', '"'].length) // 1,114,078 | |
| const NUM_ESCAPES = BigInt(['"', '\\', '/', 'b', 'f', 'n', 'r', 't'].length) // 8 | |
| const NUM_HEX_CHARS = BigInt([ | |
| '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', | |
| 'A', 'B', 'C', 'D', 'E', 'F', | |
| 'a', 'b', 'c', 'd', 'e', 'f' | |
| ].length) // 22, not 16 | |
| const NUM_HEX_ESCAPES = NUM_HEX_CHARS ** 4n // 234,256, not 65,536 | |
| const NUM_WS = BigInt(['\u0020', '\u000A', '\u000D', '\u0009'].length) // 4 | |
| // Memoization is incredibly important as the numbers get larger, | |
| // there are many combinations | |
| const memoize = f => { | |
| const known = [] | |
| return n => known[n] ??= f(n) | |
| } | |
| // Now for some enumeration. | |
| // Start with the numeric literals. | |
| // E.g. "123" | |
| // [1-9][0-9]* | |
| const countPositiveIntegers = n => | |
| (NUM_DIGITS - 1n) * NUM_DIGITS ** BigInt(n - 1) | |
| assert.equal(countPositiveIntegers(1), 9n) | |
| assert.equal(countPositiveIntegers(2), 90n) | |
| assert.equal(countPositiveIntegers(3), 900n) | |
| assert.equal(countPositiveIntegers(4), 9000n) | |
| // E.g. "123.456" | |
| // 0.[0-9]{3} | |
| // [1-9].[0-9][0-9][0-9], [1-9][0-9].[0-9][0-9], [1-9][0-9][0-9].[0-9] | |
| const countDecimals = n => | |
| NUM_DIGITS ** BigInt(n - 2) + | |
| ((NUM_DIGITS - 1n) * NUM_DIGITS ** BigInt(n - 2)) * BigInt(n - 2) | |
| assert.equal(countDecimals(3), 100n) | |
| assert.equal(countDecimals(4), 1900n) | |
| assert.equal(countDecimals(5), 28000n) | |
| assert.equal(countDecimals(6), 370000n) | |
| // E.g. "123e456" | |
| // 0[eE][0-9][0-9][0-9] | |
| // [1-9][eE][0-9][0-9][0-9], [1-9][0-9][eE][0-9][0-9], [1-9][0-9][0-9][eE][0-9] | |
| const countExponents = n => | |
| NUM_ES * NUM_DIGITS ** BigInt(n - 2) + | |
| ((NUM_DIGITS - 1n) * NUM_ES * NUM_DIGITS ** BigInt(n - 2)) * BigInt(n - 2) | |
| assert.equal(countExponents(3), 200n) | |
| assert.equal(countExponents(4), 3800n) | |
| assert.equal(countExponents(5), 56000n) | |
| assert.equal(countExponents(6), 740000n) | |
| // E.g. "123e+456" | |
| // 0[eE][+-][0-9][0-9][0-9] | |
| // [1-9][eE][+-][0-9][0-9][0-9], [1-9][0-9][eE][+-][0-9][0-9], [1-9][0-9][0-9][eE][+-][0-9] | |
| const countSignedExponents = n => | |
| NUM_ES * NUM_SIGNS * NUM_DIGITS ** BigInt(n - 3) + | |
| ((NUM_DIGITS - 1n) * NUM_ES * NUM_SIGNS * NUM_DIGITS ** BigInt(n - 3)) * BigInt(n - 3) | |
| assert.equal(countSignedExponents(4), 400n) | |
| assert.equal(countSignedExponents(5), 7600n) | |
| assert.equal(countSignedExponents(6), 112000n) | |
| assert.equal(countSignedExponents(7), 1480000n) | |
| // E.g. "123.456e789" | |
| // 0.[0-9][eE][0-9], 1 combination | |
| // [1-9].[0-9][eE][0-9], 1 combination | |
| // 0.[0-9][eE][0-9][0-9], 0.[0-9][0-9][eE][0-9], 2 combinations | |
| // [1-9].[0-9][eE][0-9][0-9], [1-9].[0-9][0-9][eE][0-9], [1-9][0-9].[0-9][eE][0-9], 3 combinations | |
| const countDecimalExponents = n => | |
| (NUM_ES * NUM_DIGITS ** BigInt(n - 3)) * BigInt(n - 4) + | |
| ((NUM_DIGITS - 1n) * NUM_ES * NUM_DIGITS ** BigInt(n - 3)) * (BigInt(n - 4) * BigInt(n - 3) / 2n) | |
| assert.equal(countDecimalExponents(5), 2000n) | |
| assert.equal(countDecimalExponents(6), 58000n) | |
| assert.equal(countDecimalExponents(7), 1140000n) | |
| assert.equal(countDecimalExponents(8), 18800000n) | |
| // E.g. "123.456E-789" | |
| // 0.[0-9][eE][+-][0-9] | |
| // [1-9].[0-9][eE][+-][0-9] | |
| const countDecimalSignedExponents = n => | |
| (NUM_ES * NUM_SIGNS * NUM_DIGITS ** BigInt(n - 4)) * BigInt(n - 5) + | |
| ((NUM_DIGITS - 1n) * NUM_ES * NUM_SIGNS * NUM_DIGITS ** BigInt(n - 4)) * (BigInt(n - 5) * BigInt(n - 4) / 2n) | |
| assert.equal(countDecimalSignedExponents(6), 4000n) | |
| assert.equal(countDecimalSignedExponents(7), 116000n) | |
| assert.equal(countDecimalSignedExponents(8), 2280000n) | |
| assert.equal(countDecimalSignedExponents(9), 37600000n) | |
| const countNonNegativeNumbers = n => { | |
| let l = 0n | |
| if (n === 1) { | |
| l += 1n // "0" | |
| } | |
| l += countPositiveIntegers(n) | |
| if (n >= 3) { | |
| l += countDecimals(n) | |
| l += countExponents(n) | |
| } | |
| if (n >= 4) { | |
| l += countSignedExponents(n) | |
| } | |
| if (n >= 5) { | |
| l += countDecimalExponents(n) | |
| } | |
| if (n >= 6) { | |
| l += countDecimalSignedExponents(n) | |
| } | |
| return l | |
| } | |
| assert.equal(countNonNegativeNumbers(4), 15100n) | |
| const countNegativeNumbers = n => | |
| countNonNegativeNumbers(n - 1) | |
| const countNumbers = n => { | |
| let l = 0n | |
| l += countNonNegativeNumbers(n) | |
| if (n >= 2) { | |
| l += countNegativeNumbers(n) | |
| } | |
| return l | |
| } | |
| assert.equal(countNumbers(1), 10n) | |
| assert.equal(countNumbers(2), 100n) | |
| assert.equal(countNumbers(3), 1290n) | |
| assert.equal(countNumbers(4), 16300n) | |
| assert.equal(countNumbers(5), 198700n) | |
| assert.equal(countNumbers(6), 2367600n) | |
| // Now for the strings... | |
| // Highly recursive, memoization is crucial here | |
| const countStringInteriors = memoize(n => { | |
| if (n === 0) { | |
| return 1n | |
| } | |
| let l = 0n | |
| if (n >= 1) { | |
| l += countStringInteriors(n - 1) * NUM_REGULAR_CHARS | |
| } | |
| if (n >= 2) { | |
| l += countStringInteriors(n - 2) * NUM_ESCAPES | |
| } | |
| if (n >= 6) { | |
| l += countStringInteriors(n - 6) * NUM_HEX_ESCAPES | |
| } | |
| return l | |
| }) | |
| assert.equal(countStringInteriors(0), 1n) | |
| assert.equal(countStringInteriors(1), 1114078n) | |
| assert.equal(countStringInteriors(2), 1241169790092n) | |
| assert.equal(countStringInteriors(3), 1382759957415027800n) | |
| const countStrings = n => | |
| countStringInteriors(n - 2) | |
| assert.equal(countStrings(2), 1n) | |
| assert.equal(countStrings(3), 1114078n) | |
| assert.equal(countStrings(4), 1241169790092n) | |
| assert.equal(countStrings(4), 1114078n * 1114078n + 8n) | |
| // Arrays | |
| const countArrayInteriorsK = memoize(n => memoize(k => { | |
| if (k === 0) { | |
| return NUM_WS ** BigInt(n) | |
| } | |
| if (k === 1) { | |
| return countElements(n) | |
| } | |
| // Try every possible location for the comma after the first element | |
| let l = 0n | |
| for (let i = 1; i + (k - 1) * 2 <= n; i++) { | |
| l += countElements(i) * countArrayInteriorsK(n - (i + 1))(k - 1) | |
| } | |
| return l | |
| })) | |
| const countArrayInteriors = memoize(n => { | |
| // Try every possible number of array elements | |
| let l = 0n | |
| l += countArrayInteriorsK(n)(0) | |
| for (let k = 1; k * 2 - 1 <= n; k++) { | |
| l += countArrayInteriorsK(n)(k) | |
| } | |
| return l | |
| }) | |
| const countArrays = n => | |
| countArrayInteriors(n - 2) | |
| // And lastly, objects | |
| // A key is a whitespaced string | |
| const countKeys = memoize(n => { | |
| // Try every possible size of string | |
| let l = 0n | |
| for (let i = 2; i <= n; i++) { | |
| l += countStrings(i) * NUM_WS ** BigInt(n - i) * BigInt(n - i + 1) | |
| } | |
| return l | |
| }) | |
| const countMembers = memoize(n => { | |
| // Try every possible location for the colon between the key and value | |
| let l = 0n | |
| for (let i = 2; i + 2 <= n; i++) { | |
| l += countKeys(i) * countElements(n - (i + 1)) | |
| } | |
| return l | |
| }) | |
| const countObjectInteriorsK = memoize(n => memoize(k => { | |
| if (k === 0) { | |
| return NUM_WS ** BigInt(n) | |
| } | |
| if (k === 1) { | |
| return countMembers(n) | |
| } | |
| // Try every possible location for the comma after the first key and value | |
| let l = 0n | |
| for (let i = 4; i + (k - 1) * 5 <= n; i++) { | |
| l += countMembers(i) * countObjectInteriorsK(n - (i + 1))(k - 1) | |
| } | |
| return l | |
| })) | |
| const countObjectInteriors = memoize(n => { | |
| // Try every possible number of key-value pairs | |
| let l = 0n | |
| l += countObjectInteriorsK(n)(0) | |
| for (let k = 1; 5 * k - 1 <= n; k++) { | |
| l += countObjectInteriorsK(n)(k) | |
| } | |
| return l | |
| }) | |
| const countObjects = n => | |
| countObjectInteriors(n - 2) | |
| const countValues = memoize(n => { | |
| let l = 0n | |
| if (n === 4) { | |
| l += 2n // "null", "true" | |
| } | |
| if (n === 5) { | |
| l += 1n // "false" | |
| } | |
| if (n >= 1) { | |
| l += countNumbers(n) | |
| } | |
| if (n >= 2) { | |
| l += countStrings(n) | |
| l += countArrays(n) | |
| l += countObjects(n) | |
| } | |
| return l | |
| }) | |
| const countElements = memoize(n => { | |
| // Try every possible size of value | |
| let l = 0n | |
| for (let i = 1; i <= n; i++) { | |
| l += countValues(i) * NUM_WS ** BigInt(n - i) * BigInt(n - i + 1) | |
| } | |
| return l | |
| }) | |
| assert.equal(countArrayInteriorsK(0)(0), 1n) | |
| assert.equal(countArrayInteriorsK(1)(0), 4n) | |
| assert.equal(countArrayInteriorsK(1)(1), 10n) | |
| assert.equal(countArrayInteriorsK(2)(0), 16n) | |
| assert.equal(countArrayInteriorsK(2)(1), 183n) | |
| assert.equal(countArrayInteriorsK(3)(0), 64n) | |
| assert.equal(countArrayInteriorsK(3)(1), 1116690n) | |
| assert.equal(countArrayInteriorsK(3)(2), 100n) | |
| assert.equal(countArrayInteriorsK(4)(0), 256n) | |
| assert.equal(countArrayInteriorsK(4)(1), 1241178737201n) | |
| assert.equal(countArrayInteriorsK(4)(2), 3660n) // 183 * 10 + 10 * 183 | |
| assert.equal(countArrayInteriorsK(5)(0), 1024n) | |
| assert.equal(countArrayInteriorsK(5)(1), 1382769886828373987n) | |
| assert.equal(countArrayInteriorsK(5)(2), 22367289n) | |
| assert.equal(countArrayInteriorsK(5)(3), 1000n) | |
| assert.equal(countArrayInteriorsK(39)(20), 100000000000000000000n) | |
| assert.equal(countArrayInteriors(0), 1n) | |
| assert.equal(countArrayInteriors(1), 14n) | |
| assert.equal(countArrayInteriors(2), 199n) | |
| assert.equal(countArrayInteriors(3), 1116854n) | |
| assert.equal(countArrayInteriors(4), 1241178741117n) | |
| assert.equal(countArrayInteriors(5), 1382769886850743300n) | |
| assert.equal(countArrays(2), 1n) | |
| assert.equal(countArrays(3), 14n) | |
| assert.equal(countArrays(4), 199n) | |
| assert.equal(countArrays(5), 1116854n) | |
| assert.equal(countArrays(6), 1241178741117n) | |
| assert.equal(countArrays(7), 1382769886850743300n) | |
| assert.equal(countKeys(2), 1n) | |
| assert.equal(countKeys(3), 1114086n) | |
| assert.equal(countKeys(4), 1241178702764n) | |
| assert.equal(countObjectInteriorsK(0)(0), 1n) | |
| assert.equal(countObjectInteriorsK(1)(0), 4n) | |
| assert.equal(countObjectInteriorsK(2)(0), 16n) | |
| assert.equal(countObjectInteriorsK(3)(0), 64n) | |
| assert.equal(countObjectInteriorsK(4)(0), 256n) | |
| assert.equal(countObjectInteriorsK(4)(1), 10n) // '{"":0}' through '{"":9}' | |
| assert.equal(countObjectInteriorsK(5)(0), 1024n) | |
| assert.equal(countObjectInteriorsK(5)(1), 11141043n) // 1114086 * 10 + 1 * 183 | |
| assert.equal(countObjectInteriors(0), 1n) | |
| assert.equal(countObjectInteriors(1), 4n) | |
| assert.equal(countObjectInteriors(2), 16n) | |
| assert.equal(countObjectInteriors(3), 64n) | |
| assert.equal(countObjectInteriors(4), 266n) | |
| assert.equal(countObjectInteriors(5), 11142067n) | |
| assert.equal(countObjects(2), 1n) | |
| assert.equal(countObjects(3), 4n) | |
| assert.equal(countObjects(4), 16n) | |
| assert.equal(countObjects(5), 64n) | |
| assert.equal(countObjects(6), 266n) | |
| assert.equal(countObjects(7), 11142067n) | |
| assert.equal(countValues(1), 10n) // 10 numbers | |
| assert.equal(countValues(2), 103n) // 100 numbers, 1 string, 1 array, 1 object | |
| assert.equal(countValues(3), 1115386n) // 1290 numbers, 1114078 strings, 14 arrays, 4 objects | |
| assert.equal(countValues(4), 1241169806609n) // 1 null, 1 true, 16300 numbers, 1241169790092 strings, 199 arrays, 16 objects | |
| assert.equal(countValues(5), 1382759957416343419n) // 1 false, 198700 numbers, 1382759957415027800 strings, 1116854 arrays, 64 objects | |
| assert.equal(countElements(1), 10n) | |
| assert.equal(countElements(2), 183n) | |
| assert.equal(countElements(3), 1116690n) | |
| assert.equal(countElements(4), 1241178737201n) | |
| assert.equal(countElements(5), 1382769886828373987n) | |
| for (let n = 1; n <= 100; n++) { | |
| console.log(n, countElements(n)) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment