Created
April 26, 2022 03:23
-
-
Save Mr0grog/7ff53ffd0f7021e2235bc132362e3770 to your computer and use it in GitHub Desktop.
Parse Airtable’s ConstantPooledData format.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Parse Airtable's "ConstantPooledData" format. They recently started using | |
* this format to compress some API responses, and it appears to be a | |
* home-grown format. | |
* | |
* Call `parseData()` if you have an object with data (e.g. a JSON-parsed API | |
* response body). | |
* | |
* Call `parseString()` if you have a raw string of data (e.g. an API response | |
* body). | |
* | |
* --------------- | |
* | |
* The basic format is two lists: | |
* - `pool` is a list of primitive JS values that can be keys or values of an | |
* object. | |
* - `pointerList` is a list of numbers, most of which are indexes into `pool`. | |
* | |
* Parse by reading `pointerList` from start to end. The first item is a code | |
* for what type of data the current value is, or the index of a value in | |
* `pool`, as follows: | |
* | |
* - If the code is 0, it represents an array. The next value is the length of | |
* the array. Subsequent values should be parsed the same as the basic parsing | |
* of `pointerList` (so if the pointer is `0`, it's an array, etc.). That is, | |
* `pointerList` looks like: | |
* | |
* 0 <-- Array | |
* N <-- Number of items in the array | |
* V1 <-- Data type or pointer to first value in the array | |
* V2 <-- Data type or pointer to second value in the array | |
* ...etc... <-- And so on until you have N items | |
* | |
* - If the code is 6, it represents an object. The next value is the number of | |
* keys in the object, followed by pointers to the key names. After that, | |
* each entry is represents a value of one key, in the opposite order of the | |
* keys. Values should be parsed the same as the basic parsing of | |
* `pointerList`. So `pointerList` should look like: | |
* | |
* 6 <-- Object | |
* N <-- Number of keys in the object | |
* K1 <-- Pointer to first key. | |
* K2 <-- Pointer to second key. | |
* ...etc... <-- And so on until you have N keys. | |
* V2 <-- Data type or pointer to second key's value. | |
* V1 <-- Data type or pointer to first key's value. | |
* | |
* - If the code is 2 or 3, the value is `true` or `false`, respectively. | |
* | |
* - If the code is 4 or 5, the value is `null` or `undefined`, respectively. | |
* | |
* - Any other code is a pointer to a value in `pool`. | |
*/ | |
import assert from 'node:assert/strict'; | |
/** | |
* These values in the pointer list indicate what data type to read, which may | |
* alter how the next pointers are parsed. | |
*/ | |
const DATA_TYPES = { | |
'0': 'ARRAY', | |
'2': 'TRUE', | |
'3': 'FALSE', | |
'4': 'NULL', | |
'5': 'UNDEFINED', | |
'6': 'OBJECT', | |
}; | |
function readArray (pointerList, pool, index) { | |
const value = []; | |
let remainingItems = pointerList[index]; | |
let currentIndex = index + 1; | |
const endIndex = pointerList.length; | |
while (remainingItems > 0) { | |
assert.ok(currentIndex < endIndex, 'Tried to read past end of pointerList'); | |
const parsed = readValue(pointerList, pool, currentIndex); | |
value.push(parsed.value); | |
currentIndex = parsed.index; | |
remainingItems--; | |
} | |
return { value, index: currentIndex } | |
} | |
function readObject (pointerList, pool, index) { | |
const value = {}; | |
let keyCount = pointerList[index]; | |
let currentIndex = index + 1 + keyCount; | |
const endIndex = pointerList.length; | |
const keys = pointerList | |
.slice(index + 1, index + 1 + keyCount) | |
.map(pointer => pool[pointer]); | |
assert.equal(keys.length, keyCount, 'Could not read expected number of object keys'); | |
while (keys.length) { | |
assert.ok(currentIndex < endIndex, 'Tried to read past end of pointerList'); | |
const key = keys.pop(); | |
const parsed = readValue(pointerList, pool, currentIndex); | |
value[key] = parsed.value; | |
currentIndex = parsed.index; | |
} | |
return { value, index: currentIndex } | |
} | |
function readValue (pointerList, pool, index) { | |
const dataType = DATA_TYPES[pointerList[index]]; | |
if (dataType === 'ARRAY') { | |
return readArray(pointerList, pool, index + 1); | |
} | |
else if (dataType === 'OBJECT') { | |
return readObject(pointerList, pool, index + 1); | |
} | |
else { | |
let value; | |
if (dataType === 'TRUE') { | |
value = true; | |
} | |
else if (dataType === 'FALSE') { | |
value = false; | |
} | |
else if (dataType === 'NULL') { | |
value = null; | |
} | |
else if (dataType === 'UNDEFINED') { | |
value = undefined; | |
} | |
else { | |
const pointer = pointerList[index]; | |
assert.ok(pointer < pool.length, `Invalid pointer: ${pointer} at index ${index}`); | |
value = pool[pointerList[index]]; | |
} | |
return { value, index: index + 1 }; | |
} | |
} | |
/** | |
* Parse an Airtable ConstantPooledData object into an actual value. This could | |
* return any type of JS value, but will usually be an object. | |
* @param {any} input A JS object with Airtable ConstantPooledData data. | |
* @returns {any} | |
*/ | |
export function parseData (input) { | |
const raw = input?.data ?? input; | |
const pointerList = raw?.pooledData?.pointerList; | |
const pool = raw?.pooledData?.pool; | |
if (!raw?.isConstantPooledData || raw?.pooledData.v !== 1 || !Array.isArray(pointerList) || !Array.isArray(pool)) { | |
throw new TypeError( | |
'Input is not Airtable ConstantPooledData v1! ' + | |
'It should be an object like: ' + | |
'{ isConstantPooledData: true, pooledData: { v: 1, pointerList: [array], pool: [array] } }' | |
); | |
} | |
const parsed = readValue(pointerList, pool, 0); | |
assert.equal(parsed.index, pointerList.length, 'Did not read entire pointerList'); | |
return parsed.value; | |
} | |
/** | |
* Parse a string with Airtable ConstantPooledData. This data format is | |
* JSON-based, so this is basically a shortcut to decode JSON before running | |
* `parseData()`. | |
* @param {string} rawString String with JSON-encoded ConstantPooledData. | |
* @returns {any} | |
*/ | |
export function parseString (rawString) { | |
const data = JSON.parse(rawString); | |
return parseData(data); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
import { readFileSync } from 'node:fs'; | |
import { inspect } from 'node:util'; | |
import { parseString } from './constant-pooled-data.mjs'; | |
if (!process.argv[2]) { | |
console.error(` | |
Please specify a path to a file to read. Usage: | |
./read-constant-pooled-data.mjs path/to/airtable/data.json | |
`); | |
} else { | |
const filePath = process.argv[2]; | |
const text = readFileSync(filePath, { encoding: 'utf-8' }); | |
const data = parseString(text); | |
console.log(inspect(data, false, 20, true)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment