Skip to content

Instantly share code, notes, and snippets.

@qntm
Last active October 4, 2025 13:30
Show Gist options
  • Save qntm/e8e7591960ed8b26b7b2eaec5ebce709 to your computer and use it in GitHub Desktop.
Save qntm/e8e7591960ed8b26b7b2eaec5ebce709 to your computer and use it in GitHub Desktop.
How many valid JSON strings are there?
/**
Code for enumerating valid JSON strings.
Bounds checking is the responsibility of the caller.
The case N = 0 is intentionally not handled.
This code attempts to be somewhat readable without significantly impacting performance.
https://qntm.org/jsoncount
*/
import assert from 'node:assert/strict'
// First some constants.
// Use BigInts throughout because we're going to be exceeding `Number.MAX_SAFE_INTEGER` at N = 5
const NUM_ES = BigInt(['E', 'e'].length)
const NUM_SIGNS = BigInt(['-', '+'].length)
const NUM_DIGITS = BigInt(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'].length)
const MIN_CHAR = 0x0020n
const MAX_CHAR = 0x10FFFFn
const NUM_REGULAR_CHARS = (MAX_CHAR + 1n) - MIN_CHAR - BigInt(['\\', '"'].length) // 1,114,078
const NUM_ESCAPES = BigInt(['"', '\\', '/', 'b', 'f', 'n', 'r', 't'].length) // 8
const NUM_HEX_CHARS = BigInt([
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F',
'a', 'b', 'c', 'd', 'e', 'f'
].length) // 22, not 16
const NUM_HEX_ESCAPES = NUM_HEX_CHARS ** 4n // 234,256, not 65,536
const NUM_WS = BigInt(['\u0020', '\u000A', '\u000D', '\u0009'].length) // 4
// Memoization is incredibly important as the numbers get larger,
// there are many combinations
const memoize = f => {
const known = []
return n => known[n] ??= f(n)
}
// Now for some enumeration.
// Start with the numeric literals.
// E.g. "123"
// [1-9][0-9]*
const countPositiveIntegers = n =>
(NUM_DIGITS - 1n) * NUM_DIGITS ** BigInt(n - 1)
assert.equal(countPositiveIntegers(1), 9n)
assert.equal(countPositiveIntegers(2), 90n)
assert.equal(countPositiveIntegers(3), 900n)
assert.equal(countPositiveIntegers(4), 9000n)
// E.g. "123.456"
// 0.[0-9]{3}
// [1-9].[0-9][0-9][0-9], [1-9][0-9].[0-9][0-9], [1-9][0-9][0-9].[0-9]
const countDecimals = n =>
NUM_DIGITS ** BigInt(n - 2) +
((NUM_DIGITS - 1n) * NUM_DIGITS ** BigInt(n - 2)) * BigInt(n - 2)
assert.equal(countDecimals(3), 100n)
assert.equal(countDecimals(4), 1900n)
assert.equal(countDecimals(5), 28000n)
assert.equal(countDecimals(6), 370000n)
// E.g. "123e456"
// 0[eE][0-9][0-9][0-9]
// [1-9][eE][0-9][0-9][0-9], [1-9][0-9][eE][0-9][0-9], [1-9][0-9][0-9][eE][0-9]
const countExponents = n =>
NUM_ES * NUM_DIGITS ** BigInt(n - 2) +
((NUM_DIGITS - 1n) * NUM_ES * NUM_DIGITS ** BigInt(n - 2)) * BigInt(n - 2)
assert.equal(countExponents(3), 200n)
assert.equal(countExponents(4), 3800n)
assert.equal(countExponents(5), 56000n)
assert.equal(countExponents(6), 740000n)
// E.g. "123e+456"
// 0[eE][+-][0-9][0-9][0-9]
// [1-9][eE][+-][0-9][0-9][0-9], [1-9][0-9][eE][+-][0-9][0-9], [1-9][0-9][0-9][eE][+-][0-9]
const countSignedExponents = n =>
NUM_ES * NUM_SIGNS * NUM_DIGITS ** BigInt(n - 3) +
((NUM_DIGITS - 1n) * NUM_ES * NUM_SIGNS * NUM_DIGITS ** BigInt(n - 3)) * BigInt(n - 3)
assert.equal(countSignedExponents(4), 400n)
assert.equal(countSignedExponents(5), 7600n)
assert.equal(countSignedExponents(6), 112000n)
assert.equal(countSignedExponents(7), 1480000n)
// E.g. "123.456e789"
// 0.[0-9][eE][0-9], 1 combination
// [1-9].[0-9][eE][0-9], 1 combination
// 0.[0-9][eE][0-9][0-9], 0.[0-9][0-9][eE][0-9], 2 combinations
// [1-9].[0-9][eE][0-9][0-9], [1-9].[0-9][0-9][eE][0-9], [1-9][0-9].[0-9][eE][0-9], 3 combinations
const countDecimalExponents = n =>
(NUM_ES * NUM_DIGITS ** BigInt(n - 3)) * BigInt(n - 4) +
((NUM_DIGITS - 1n) * NUM_ES * NUM_DIGITS ** BigInt(n - 3)) * (BigInt(n - 4) * BigInt(n - 3) / 2n)
assert.equal(countDecimalExponents(5), 2000n)
assert.equal(countDecimalExponents(6), 58000n)
assert.equal(countDecimalExponents(7), 1140000n)
assert.equal(countDecimalExponents(8), 18800000n)
// E.g. "123.456E-789"
// 0.[0-9][eE][+-][0-9]
// [1-9].[0-9][eE][+-][0-9]
const countDecimalSignedExponents = n =>
(NUM_ES * NUM_SIGNS * NUM_DIGITS ** BigInt(n - 4)) * BigInt(n - 5) +
((NUM_DIGITS - 1n) * NUM_ES * NUM_SIGNS * NUM_DIGITS ** BigInt(n - 4)) * (BigInt(n - 5) * BigInt(n - 4) / 2n)
assert.equal(countDecimalSignedExponents(6), 4000n)
assert.equal(countDecimalSignedExponents(7), 116000n)
assert.equal(countDecimalSignedExponents(8), 2280000n)
assert.equal(countDecimalSignedExponents(9), 37600000n)
const countNonNegativeNumbers = n => {
let l = 0n
if (n === 1) {
l += 1n // "0"
}
l += countPositiveIntegers(n)
if (n >= 3) {
l += countDecimals(n)
l += countExponents(n)
}
if (n >= 4) {
l += countSignedExponents(n)
}
if (n >= 5) {
l += countDecimalExponents(n)
}
if (n >= 6) {
l += countDecimalSignedExponents(n)
}
return l
}
assert.equal(countNonNegativeNumbers(4), 15100n)
const countNegativeNumbers = n =>
countNonNegativeNumbers(n - 1)
const countNumbers = n => {
let l = 0n
l += countNonNegativeNumbers(n)
if (n >= 2) {
l += countNegativeNumbers(n)
}
return l
}
assert.equal(countNumbers(1), 10n)
assert.equal(countNumbers(2), 100n)
assert.equal(countNumbers(3), 1290n)
assert.equal(countNumbers(4), 16300n)
assert.equal(countNumbers(5), 198700n)
assert.equal(countNumbers(6), 2367600n)
// Now for the strings...
// Highly recursive, memoization is crucial here
const countStringInteriors = memoize(n => {
if (n === 0) {
return 1n
}
let l = 0n
if (n >= 1) {
l += countStringInteriors(n - 1) * NUM_REGULAR_CHARS
}
if (n >= 2) {
l += countStringInteriors(n - 2) * NUM_ESCAPES
}
if (n >= 6) {
l += countStringInteriors(n - 6) * NUM_HEX_ESCAPES
}
return l
})
assert.equal(countStringInteriors(0), 1n)
assert.equal(countStringInteriors(1), 1114078n)
assert.equal(countStringInteriors(2), 1241169790092n)
assert.equal(countStringInteriors(3), 1382759957415027800n)
const countStrings = n =>
countStringInteriors(n - 2)
assert.equal(countStrings(2), 1n)
assert.equal(countStrings(3), 1114078n)
assert.equal(countStrings(4), 1241169790092n)
assert.equal(countStrings(4), 1114078n * 1114078n + 8n)
// Arrays
const countArrayInteriorsK = memoize(n => memoize(k => {
if (k === 0) {
return NUM_WS ** BigInt(n)
}
if (k === 1) {
return countElements(n)
}
// Try every possible location for the comma after the first element
let l = 0n
for (let i = 1; i + (k - 1) * 2 <= n; i++) {
l += countElements(i) * countArrayInteriorsK(n - (i + 1))(k - 1)
}
return l
}))
const countArrayInteriors = memoize(n => {
// Try every possible number of array elements
let l = 0n
l += countArrayInteriorsK(n)(0)
for (let k = 1; k * 2 - 1 <= n; k++) {
l += countArrayInteriorsK(n)(k)
}
return l
})
const countArrays = n =>
countArrayInteriors(n - 2)
// And lastly, objects
// A key is a whitespaced string
const countKeys = memoize(n => {
// Try every possible size of string
let l = 0n
for (let i = 2; i <= n; i++) {
l += countStrings(i) * NUM_WS ** BigInt(n - i) * BigInt(n - i + 1)
}
return l
})
const countMembers = memoize(n => {
// Try every possible location for the colon between the key and value
let l = 0n
for (let i = 2; i + 2 <= n; i++) {
l += countKeys(i) * countElements(n - (i + 1))
}
return l
})
const countObjectInteriorsK = memoize(n => memoize(k => {
if (k === 0) {
return NUM_WS ** BigInt(n)
}
if (k === 1) {
return countMembers(n)
}
// Try every possible location for the comma after the first key and value
let l = 0n
for (let i = 4; i + (k - 1) * 5 <= n; i++) {
l += countMembers(i) * countObjectInteriorsK(n - (i + 1))(k - 1)
}
return l
}))
const countObjectInteriors = memoize(n => {
// Try every possible number of key-value pairs
let l = 0n
l += countObjectInteriorsK(n)(0)
for (let k = 1; 5 * k - 1 <= n; k++) {
l += countObjectInteriorsK(n)(k)
}
return l
})
const countObjects = n =>
countObjectInteriors(n - 2)
const countValues = memoize(n => {
let l = 0n
if (n === 4) {
l += 2n // "null", "true"
}
if (n === 5) {
l += 1n // "false"
}
if (n >= 1) {
l += countNumbers(n)
}
if (n >= 2) {
l += countStrings(n)
l += countArrays(n)
l += countObjects(n)
}
return l
})
const countElements = memoize(n => {
// Try every possible size of value
let l = 0n
for (let i = 1; i <= n; i++) {
l += countValues(i) * NUM_WS ** BigInt(n - i) * BigInt(n - i + 1)
}
return l
})
assert.equal(countArrayInteriorsK(0)(0), 1n)
assert.equal(countArrayInteriorsK(1)(0), 4n)
assert.equal(countArrayInteriorsK(1)(1), 10n)
assert.equal(countArrayInteriorsK(2)(0), 16n)
assert.equal(countArrayInteriorsK(2)(1), 183n)
assert.equal(countArrayInteriorsK(3)(0), 64n)
assert.equal(countArrayInteriorsK(3)(1), 1116690n)
assert.equal(countArrayInteriorsK(3)(2), 100n)
assert.equal(countArrayInteriorsK(4)(0), 256n)
assert.equal(countArrayInteriorsK(4)(1), 1241178737201n)
assert.equal(countArrayInteriorsK(4)(2), 3660n) // 183 * 10 + 10 * 183
assert.equal(countArrayInteriorsK(5)(0), 1024n)
assert.equal(countArrayInteriorsK(5)(1), 1382769886828373987n)
assert.equal(countArrayInteriorsK(5)(2), 22367289n)
assert.equal(countArrayInteriorsK(5)(3), 1000n)
assert.equal(countArrayInteriorsK(39)(20), 100000000000000000000n)
assert.equal(countArrayInteriors(0), 1n)
assert.equal(countArrayInteriors(1), 14n)
assert.equal(countArrayInteriors(2), 199n)
assert.equal(countArrayInteriors(3), 1116854n)
assert.equal(countArrayInteriors(4), 1241178741117n)
assert.equal(countArrayInteriors(5), 1382769886850743300n)
assert.equal(countArrays(2), 1n)
assert.equal(countArrays(3), 14n)
assert.equal(countArrays(4), 199n)
assert.equal(countArrays(5), 1116854n)
assert.equal(countArrays(6), 1241178741117n)
assert.equal(countArrays(7), 1382769886850743300n)
assert.equal(countKeys(2), 1n)
assert.equal(countKeys(3), 1114086n)
assert.equal(countKeys(4), 1241178702764n)
assert.equal(countObjectInteriorsK(0)(0), 1n)
assert.equal(countObjectInteriorsK(1)(0), 4n)
assert.equal(countObjectInteriorsK(2)(0), 16n)
assert.equal(countObjectInteriorsK(3)(0), 64n)
assert.equal(countObjectInteriorsK(4)(0), 256n)
assert.equal(countObjectInteriorsK(4)(1), 10n) // '{"":0}' through '{"":9}'
assert.equal(countObjectInteriorsK(5)(0), 1024n)
assert.equal(countObjectInteriorsK(5)(1), 11141043n) // 1114086 * 10 + 1 * 183
assert.equal(countObjectInteriors(0), 1n)
assert.equal(countObjectInteriors(1), 4n)
assert.equal(countObjectInteriors(2), 16n)
assert.equal(countObjectInteriors(3), 64n)
assert.equal(countObjectInteriors(4), 266n)
assert.equal(countObjectInteriors(5), 11142067n)
assert.equal(countObjects(2), 1n)
assert.equal(countObjects(3), 4n)
assert.equal(countObjects(4), 16n)
assert.equal(countObjects(5), 64n)
assert.equal(countObjects(6), 266n)
assert.equal(countObjects(7), 11142067n)
assert.equal(countValues(1), 10n) // 10 numbers
assert.equal(countValues(2), 103n) // 100 numbers, 1 string, 1 array, 1 object
assert.equal(countValues(3), 1115386n) // 1290 numbers, 1114078 strings, 14 arrays, 4 objects
assert.equal(countValues(4), 1241169806609n) // 1 null, 1 true, 16300 numbers, 1241169790092 strings, 199 arrays, 16 objects
assert.equal(countValues(5), 1382759957416343419n) // 1 false, 198700 numbers, 1382759957415027800 strings, 1116854 arrays, 64 objects
assert.equal(countElements(1), 10n)
assert.equal(countElements(2), 183n)
assert.equal(countElements(3), 1116690n)
assert.equal(countElements(4), 1241178737201n)
assert.equal(countElements(5), 1382769886828373987n)
for (let n = 1; n <= 100; n++) {
console.log(n, countElements(n))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment