Mr0grog · April 26, 2022 03:23
diff --git a/constant-pooled-data.mjs b/constant-pooled-data.mjs
 /**
 * Parse Airtable's "ConstantPooledData" format. They recently started using
 * this format to compress some API responses, and it appears to be a
 * home-grown format.
 * 
 * Call `parseData()` if you have an object with data (e.g. a JSON-parsed API
 * response body).
 * 
 * Call `parseString()` if you have a raw string of data (e.g. an API response
 * body).
 * 
 * ---------------
 * 
 * The basic format is two lists:
 * - `pool` is a list of primitive JS values that can be keys or values of an
 *   object.
 * - `pointerList` is a list of numbers, most of which are indexes into `pool`.
 * 
 * Parse by reading `pointerList` from start to end. The first item is a code
 * for what type of data the current value is, or the index of a value in
 * `pool`, as follows:
 * 
 * - If the code is 0, it represents an array. The next value is the length of
 *   the array. Subsequent values should be parsed the same as the basic parsing
 *   of `pointerList` (so if the pointer is `0`, it's an array, etc.). That is,
 *   `pointerList` looks like:
 * 
 *       0          <-- Array
 *       N          <-- Number of items in the array
 *       V1         <-- Data type or pointer to first value in the array
 *       V2         <-- Data type or pointer to second value in the array
 *       ...etc...  <-- And so on until you have N items
 * 
 * - If the code is 6, it represents an object. The next value is the number of
 *   keys in the object, followed by pointers to the key names. After that,
 *   each entry is represents a value of one key, in the opposite order of the
 *   keys. Values should be parsed the same as the basic parsing of
 *   `pointerList`. So `pointerList` should look like:
 * 
 *       6          <-- Object
 *       N          <-- Number of keys in the object
 *       K1         <-- Pointer to first key.
 *       K2         <-- Pointer to second key.
 *       ...etc...  <-- And so on until you have N keys.
 *       V2         <-- Data type or pointer to second key's value.
 *       V1         <-- Data type or pointer to first key's value.
 * 
 * - If the code is 2 or 3, the value is `true` or `false`, respectively.
 * 
 * - If the code is 4 or 5, the value is `null` or `undefined`, respectively.
 * 
 * - Any other code is a pointer to a value in `pool`.
 */

 import assert from 'node:assert/strict';

 /**
 * These values in the pointer list indicate what data type to read, which may
 * alter how the next pointers are parsed.
 */
 const DATA_TYPES = {
  '0': 'ARRAY',
  '2': 'TRUE',
  '3': 'FALSE',
  '4': 'NULL',
  '5': 'UNDEFINED',
  '6': 'OBJECT',
 };

 function readArray (pointerList, pool, index) {
  const value = [];
  let remainingItems = pointerList[index];
  let currentIndex = index + 1;
  const endIndex = pointerList.length;

  while (remainingItems > 0) {
    assert.ok(currentIndex < endIndex, 'Tried to read past end of pointerList');

    const parsed = readValue(pointerList, pool, currentIndex);
    value.push(parsed.value);
    currentIndex = parsed.index;
    remainingItems--;
  }

  return { value, index: currentIndex }
 }

 function readObject (pointerList, pool, index) {
  const value = {};
  let keyCount = pointerList[index];
  let currentIndex = index + 1 + keyCount;
  const endIndex = pointerList.length;
  const keys = pointerList
    .slice(index + 1, index + 1 + keyCount)
    .map(pointer => pool[pointer]);
  assert.equal(keys.length, keyCount, 'Could not read expected number of object keys');

  while (keys.length) {
    assert.ok(currentIndex < endIndex, 'Tried to read past end of pointerList');

    const key = keys.pop();
    const parsed = readValue(pointerList, pool, currentIndex);
    value[key] = parsed.value;
    currentIndex = parsed.index;
  }

  return { value, index: currentIndex }
 }

 function readValue (pointerList, pool, index) {
  const dataType = DATA_TYPES[pointerList[index]];
  if (dataType === 'ARRAY') {
    return readArray(pointerList, pool, index + 1);
  }
  else if (dataType === 'OBJECT') {
    return readObject(pointerList, pool, index + 1);
  }
  else {
    let value;
    if (dataType === 'TRUE') {
      value = true;
    }
    else if (dataType === 'FALSE') {
      value = false;
    }
    else if (dataType === 'NULL') {
      value = null;
    }
    else if (dataType === 'UNDEFINED') {
      value = undefined;
    }
    else {
      const pointer = pointerList[index];
      assert.ok(pointer < pool.length, `Invalid pointer: ${pointer} at index ${index}`);
      value = pool[pointerList[index]];
    }
    return { value, index: index + 1 };
  }
 }

 /**
 * Parse an Airtable ConstantPooledData object into an actual value. This could
 * return any type of JS value, but will usually be an object.
 * @param {any} input A JS object with Airtable ConstantPooledData data.
 * @returns {any}
 */
 export function parseData (input) {
  const raw = input?.data ?? input;
  const pointerList = raw?.pooledData?.pointerList;
  const pool = raw?.pooledData?.pool;
  if (!raw?.isConstantPooledData || raw?.pooledData.v !== 1 || !Array.isArray(pointerList) || !Array.isArray(pool)) {
    throw new TypeError(
      'Input is not Airtable ConstantPooledData v1! ' +
      'It should be an object like: ' +
      '{ isConstantPooledData: true, pooledData: { v: 1, pointerList: [array], pool: [array] } }'
    );
  }

  const parsed = readValue(pointerList, pool, 0);
  assert.equal(parsed.index, pointerList.length, 'Did not read entire pointerList');

  return parsed.value;
 }

 /**
 * Parse a string with Airtable ConstantPooledData. This data format is
 * JSON-based, so this is basically a shortcut to decode JSON before running
 * `parseData()`.
 * @param {string} rawString String with JSON-encoded ConstantPooledData.
 * @returns {any}
 */
 export function parseString (rawString) {
  const data = JSON.parse(rawString);
  return parseData(data);
 }
diff --git a/read-constant-pooled-data.mjs b/read-constant-pooled-data.mjs
 #!/usr/bin/env node
 import { readFileSync } from 'node:fs';
 import { inspect } from 'node:util';
 import { parseString } from './constant-pooled-data.mjs';

 if (!process.argv[2]) {
  console.error(`
    Please specify a path to a file to read. Usage:

    ./read-constant-pooled-data.mjs path/to/airtable/data.json
  `);
 } else {
  const filePath = process.argv[2];
  const text = readFileSync(filePath, { encoding: 'utf-8' });
  const data = parseString(text);
  console.log(inspect(data, false, 20, true));
 }
	/**
	* Parse Airtable's "ConstantPooledData" format. They recently started using
	* this format to compress some API responses, and it appears to be a
	* home-grown format.
	*
	* Call `parseData()` if you have an object with data (e.g. a JSON-parsed API
	* response body).
	*
	* Call `parseString()` if you have a raw string of data (e.g. an API response
	* body).
	*
	* ---------------
	*
	* The basic format is two lists:
	* - `pool` is a list of primitive JS values that can be keys or values of an
	* object.
	* - `pointerList` is a list of numbers, most of which are indexes into `pool`.
	*
	* Parse by reading `pointerList` from start to end. The first item is a code
	* for what type of data the current value is, or the index of a value in
	* `pool`, as follows:
	*
	* - If the code is 0, it represents an array. The next value is the length of
	* the array. Subsequent values should be parsed the same as the basic parsing
	* of `pointerList` (so if the pointer is `0`, it's an array, etc.). That is,
	* `pointerList` looks like:
	*
	* 0 <-- Array
	* N <-- Number of items in the array
	* V1 <-- Data type or pointer to first value in the array
	* V2 <-- Data type or pointer to second value in the array
	* ...etc... <-- And so on until you have N items
	*
	* - If the code is 6, it represents an object. The next value is the number of
	* keys in the object, followed by pointers to the key names. After that,
	* each entry is represents a value of one key, in the opposite order of the
	* keys. Values should be parsed the same as the basic parsing of
	* `pointerList`. So `pointerList` should look like:
	*
	* 6 <-- Object
	* N <-- Number of keys in the object
	* K1 <-- Pointer to first key.
	* K2 <-- Pointer to second key.
	* ...etc... <-- And so on until you have N keys.
	* V2 <-- Data type or pointer to second key's value.
	* V1 <-- Data type or pointer to first key's value.
	*
	* - If the code is 2 or 3, the value is `true` or `false`, respectively.
	*
	* - If the code is 4 or 5, the value is `null` or `undefined`, respectively.
	*
	* - Any other code is a pointer to a value in `pool`.
	*/

	import assert from 'node:assert/strict';

	/**
	* These values in the pointer list indicate what data type to read, which may
	* alter how the next pointers are parsed.
	*/
	const DATA_TYPES = {
	'0': 'ARRAY',
	'2': 'TRUE',
	'3': 'FALSE',
	'4': 'NULL',
	'5': 'UNDEFINED',
	'6': 'OBJECT',
	};

	function readArray (pointerList, pool, index) {
	const value = [];
	let remainingItems = pointerList[index];
	let currentIndex = index + 1;
	const endIndex = pointerList.length;

	while (remainingItems > 0) {
	assert.ok(currentIndex < endIndex, 'Tried to read past end of pointerList');

	const parsed = readValue(pointerList, pool, currentIndex);
	value.push(parsed.value);
	currentIndex = parsed.index;
	remainingItems--;
	}

	return { value, index: currentIndex }
	}

	function readObject (pointerList, pool, index) {
	const value = {};
	let keyCount = pointerList[index];
	let currentIndex = index + 1 + keyCount;
	const endIndex = pointerList.length;
	const keys = pointerList
	.slice(index + 1, index + 1 + keyCount)
	.map(pointer => pool[pointer]);
	assert.equal(keys.length, keyCount, 'Could not read expected number of object keys');

	while (keys.length) {
	assert.ok(currentIndex < endIndex, 'Tried to read past end of pointerList');

	const key = keys.pop();
	const parsed = readValue(pointerList, pool, currentIndex);
	value[key] = parsed.value;
	currentIndex = parsed.index;
	}

	return { value, index: currentIndex }
	}

	function readValue (pointerList, pool, index) {
	const dataType = DATA_TYPES[pointerList[index]];
	if (dataType === 'ARRAY') {
	return readArray(pointerList, pool, index + 1);
	}
	else if (dataType === 'OBJECT') {
	return readObject(pointerList, pool, index + 1);
	}
	else {
	let value;
	if (dataType === 'TRUE') {
	value = true;
	}
	else if (dataType === 'FALSE') {
	value = false;
	}
	else if (dataType === 'NULL') {
	value = null;
	}
	else if (dataType === 'UNDEFINED') {
	value = undefined;
	}
	else {
	const pointer = pointerList[index];
	assert.ok(pointer < pool.length, `Invalid pointer: ${pointer} at index ${index}`);
	value = pool[pointerList[index]];
	}
	return { value, index: index + 1 };
	}
	}

	/**
	* Parse an Airtable ConstantPooledData object into an actual value. This could
	* return any type of JS value, but will usually be an object.
	* @param {any} input A JS object with Airtable ConstantPooledData data.
	* @returns {any}
	*/
	export function parseData (input) {
	const raw = input?.data ?? input;
	const pointerList = raw?.pooledData?.pointerList;
	const pool = raw?.pooledData?.pool;
	if (!raw?.isConstantPooledData \|\| raw?.pooledData.v !== 1 \|\| !Array.isArray(pointerList) \|\| !Array.isArray(pool)) {
	throw new TypeError(
	'Input is not Airtable ConstantPooledData v1! ' +
	'It should be an object like: ' +
	'{ isConstantPooledData: true, pooledData: { v: 1, pointerList: [array], pool: [array] } }'
	);
	}

	const parsed = readValue(pointerList, pool, 0);
	assert.equal(parsed.index, pointerList.length, 'Did not read entire pointerList');

	return parsed.value;
	}

	/**
	* Parse a string with Airtable ConstantPooledData. This data format is
	* JSON-based, so this is basically a shortcut to decode JSON before running
	* `parseData()`.
	* @param {string} rawString String with JSON-encoded ConstantPooledData.
	* @returns {any}
	*/
	export function parseString (rawString) {
	const data = JSON.parse(rawString);
	return parseData(data);
	}
	#!/usr/bin/env node
	import { readFileSync } from 'node:fs';
	import { inspect } from 'node:util';
	import { parseString } from './constant-pooled-data.mjs';

	if (!process.argv[2]) {
	console.error(`
	Please specify a path to a file to read. Usage:

	./read-constant-pooled-data.mjs path/to/airtable/data.json
	`);
	} else {
	const filePath = process.argv[2];
	const text = readFileSync(filePath, { encoding: 'utf-8' });
	const data = parseString(text);
	console.log(inspect(data, false, 20, true));
	}