billyeh · December 30, 2021 00:26
diff --git a/outline_formatter.gs b/outline_formatter.gs
 /**
 * Convenient script for formatting Google Docs converted from outline PDF files.
 * Simply copy/paste the outline text, and the script cleans up the whitespace
 * and correctly formats the document according to the Roman numerals found
 * in the text.

 /**
  * Calculates all the next Roman numeral strings we expect.
  * @return {Array} List of outline point strings to search the 
  * document for. For example, [' I. ', ' C. ', ' 1. '].
  */
 function nextOutlinePoints(indices) {
  const ret = [];
  for (let indentation = 0; indentation <= indices.length; indentation++) {
    const index = indentation === indices.length ? -1 : indices[indentation];
    const pointString = nextOutlinePointString(indentation, index + 1);
    if (pointString) {
      ret.push(' ' + pointString + '\\. ');
    }
  }
  return ret;
 }

 /**
  * Gets the string representation of an outline point given its indentation and index.
  * @param {number} indentation - Current indentation level.
  * @param {number} index - Which point at this indentation level to represent.
  * @return {String} The string representation of this outline point.
  */
 function nextOutlinePointString(indentation, index) {
  switch(indentation) {
    case 0:  // Capital Roman numerals.
      return romanNumeral(index);
    case 1:  // Capital Latin letters.
      return latinAlphabet(index).toUpperCase();
    case 2:  // Arabic Numbers.
      return (index + 1).toString();
    case 3:  // Lowercase Latin letters.
      return latinAlphabet(index).toLowerCase();
  }
 }

 /* Calculates the Latin letter representation of a number index. */
 function latinAlphabet(index) {
  let currentLetter = 'a';
  for (let i = 0; i < index; i++) {
    currentLetter = nextLatinAlphabet(currentLetter);
  }
  return currentLetter;
 }

 /** 
  * Gets the next letter in the Latin alphabet, handling upper case and 
  * wrapping (e.g. z -> aa.).
  * Cribbed from https://stackoverflow.com/a/31540111.
  */
 function nextLatinAlphabet(key) {
  if (key === 'Z' || key === 'z') {
    return String.fromCharCode(key.charCodeAt() - 25) + String.fromCharCode(key.charCodeAt() - 25); // AA or aa
  } else {
    let lastChar = key.slice(-1);
    let sub = key.slice(0, -1);
    if (lastChar === 'Z' || lastChar === 'z') {
      // If a string of length > 1 ends in Z/z,
      // increment the string (excluding the last Z/z) recursively,
      // and append A/a (depending on casing) to it
      return nextLatinAlphabet(sub) + String.fromCharCode(lastChar.charCodeAt() - 25);
    } else {
      // (take till last char) append with (increment last char)
      return sub + String.fromCharCode(lastChar.charCodeAt() + 1);
    }
  }
  return key;
 }

 /** 
 * Calculates the Roman numeral representation of a number index. 
 * Cribbed from https://stackoverflow.com/a/41358305.
 */
 function romanNumeral(num) {
  num += 1;
  const ROMAN = {
      M: 1000,
      CM: 900,
      D: 500,
      CD: 400,
      C: 100,
      XC: 90,
      L: 50,
      XL: 40,
      X: 10,
      IX: 9,
      V: 5,
      IV: 4,
      I: 1,
  };
  let ret = [];
  for (let i of Object.keys(ROMAN)) {
    let q = Math.floor(num / ROMAN[i]);
    num -= q * ROMAN[i];
    ret.push(i.repeat(q));
  }
  return ret.join('');
 }

 /**
 * Cleans the outline before formatting it.
 * @param {Body} body - A document body element to clean.
 */
 function preformat(body) {
  let bodyText = body.getText();
  bodyText = bodyText.replace(/\n/g, '');
  bodyText = bodyText.replace(/\d+\s+CRYSTALLIZATION STUDY OUTLINES\s+Message [A-Z][a-z]+ \(continuation\)/g, '');
  bodyText = bodyText.replace(/\d+\s+JEREMIAH AND LAMENTATIONS Message [A-Z][a-z]+ \(continuation\)/g, '');
  body.setText(bodyText);
 }

 /**
 * Extracts outline text in the document into outline formatting.
 * @param {Body} body - A document body element to format.
 * @return {Array} A list of outline point text with the indentation level 
 * of the following point. For example,
 * [
 *     {text: "I. First point", indentation: 1},
 *     {text: "A. Second point", indentation: 1},
 *     ...
 * ]
 */
 function extractPoints(body) {
  let outlinePoint = [0];
  let nextIndex = Infinity;
  let texts = [];

  while (body.getText().length > 0) {
    let nextPossiblePoints = nextOutlinePoints(outlinePoint);
    let indentation = 0;
    for (let i = 0; i < nextPossiblePoints.length; i++) {
      let range = body.findText(nextPossiblePoints[i]);
      if (range && range.getStartOffset() < nextIndex) {
        nextIndex = range.getStartOffset();
        indentation = i;
      }
    }

    let text = body.getText().substr(0, nextIndex);
    text = text.substr(text.indexOf(' ') + 1);
    texts.push({
      text: text,
      indentation: indentation,
    });
    body.setText(body.getText().substr(nextIndex + 1));
    if (indentation >= outlinePoint.length) {
      outlinePoint.push(0);
    } else {
      outlinePoint[indentation]++;
    }
    outlinePoint = outlinePoint.slice(0, indentation + 1);
    nextIndex = Infinity;
  }

  return(texts);
 }

 /** 
 * Creates ListItems with the correct indentation, given outline points
 * (see extractPoints).
 * @param(Body} body - A document body element.
 * @param{Array} points - A list of outline points.
 */
 function createOutline(body, points) {
  let currentIndentation = 0;
  let previousListItem = null;
  
  for (const point of points) {
    let currentListItem = body.appendListItem(point.text);
    currentListItem.setNestingLevel(currentIndentation);

    currentIndentation = point.indentation;
    previousListItem = currentListItem;
  }
 }

 function main() {
  const document = DocumentApp.getActiveDocument();
  const body = document.getBody();
  
  preformat(body);
  
  let points = extractPoints(body);
  createOutline(body, points);
 }
	/**
	* Convenient script for formatting Google Docs converted from outline PDF files.
	* Simply copy/paste the outline text, and the script cleans up the whitespace
	* and correctly formats the document according to the Roman numerals found
	* in the text.

	/**
	* Calculates all the next Roman numeral strings we expect.
	* @return {Array} List of outline point strings to search the
	* document for. For example, [' I. ', ' C. ', ' 1. '].
	*/
	function nextOutlinePoints(indices) {
	const ret = [];
	for (let indentation = 0; indentation <= indices.length; indentation++) {
	const index = indentation === indices.length ? -1 : indices[indentation];
	const pointString = nextOutlinePointString(indentation, index + 1);
	if (pointString) {
	ret.push(' ' + pointString + '\\. ');
	}
	}
	return ret;
	}

	/**
	* Gets the string representation of an outline point given its indentation and index.
	* @param {number} indentation - Current indentation level.
	* @param {number} index - Which point at this indentation level to represent.
	* @return {String} The string representation of this outline point.
	*/
	function nextOutlinePointString(indentation, index) {
	switch(indentation) {
	case 0: // Capital Roman numerals.
	return romanNumeral(index);
	case 1: // Capital Latin letters.
	return latinAlphabet(index).toUpperCase();
	case 2: // Arabic Numbers.
	return (index + 1).toString();
	case 3: // Lowercase Latin letters.
	return latinAlphabet(index).toLowerCase();
	}
	}

	/* Calculates the Latin letter representation of a number index. */
	function latinAlphabet(index) {
	let currentLetter = 'a';
	for (let i = 0; i < index; i++) {
	currentLetter = nextLatinAlphabet(currentLetter);
	}
	return currentLetter;
	}

	/**
	* Gets the next letter in the Latin alphabet, handling upper case and
	* wrapping (e.g. z -> aa.).
	* Cribbed from https://stackoverflow.com/a/31540111.
	*/
	function nextLatinAlphabet(key) {
	if (key === 'Z' \|\| key === 'z') {
	return String.fromCharCode(key.charCodeAt() - 25) + String.fromCharCode(key.charCodeAt() - 25); // AA or aa
	} else {
	let lastChar = key.slice(-1);
	let sub = key.slice(0, -1);
	if (lastChar === 'Z' \|\| lastChar === 'z') {
	// If a string of length > 1 ends in Z/z,
	// increment the string (excluding the last Z/z) recursively,
	// and append A/a (depending on casing) to it
	return nextLatinAlphabet(sub) + String.fromCharCode(lastChar.charCodeAt() - 25);
	} else {
	// (take till last char) append with (increment last char)
	return sub + String.fromCharCode(lastChar.charCodeAt() + 1);
	}
	}
	return key;
	}

	/**
	* Calculates the Roman numeral representation of a number index.
	* Cribbed from https://stackoverflow.com/a/41358305.
	*/
	function romanNumeral(num) {
	num += 1;
	const ROMAN = {
	M: 1000,
	CM: 900,
	D: 500,
	CD: 400,
	C: 100,
	XC: 90,
	L: 50,
	XL: 40,
	X: 10,
	IX: 9,
	V: 5,
	IV: 4,
	I: 1,
	};
	let ret = [];
	for (let i of Object.keys(ROMAN)) {
	let q = Math.floor(num / ROMAN[i]);
	num -= q * ROMAN[i];
	ret.push(i.repeat(q));
	}
	return ret.join('');
	}

	/**
	* Cleans the outline before formatting it.
	* @param {Body} body - A document body element to clean.
	*/
	function preformat(body) {
	let bodyText = body.getText();
	bodyText = bodyText.replace(/\n/g, '');
	bodyText = bodyText.replace(/\d+\s+CRYSTALLIZATION STUDY OUTLINES\s+Message [A-Z][a-z]+ \(continuation\)/g, '');
	bodyText = bodyText.replace(/\d+\s+JEREMIAH AND LAMENTATIONS Message [A-Z][a-z]+ \(continuation\)/g, '');
	body.setText(bodyText);
	}

	/**
	* Extracts outline text in the document into outline formatting.
	* @param {Body} body - A document body element to format.
	* @return {Array} A list of outline point text with the indentation level
	* of the following point. For example,
	* [
	* {text: "I. First point", indentation: 1},
	* {text: "A. Second point", indentation: 1},
	* ...
	* ]
	*/
	function extractPoints(body) {
	let outlinePoint = [0];
	let nextIndex = Infinity;
	let texts = [];

	while (body.getText().length > 0) {
	let nextPossiblePoints = nextOutlinePoints(outlinePoint);
	let indentation = 0;
	for (let i = 0; i < nextPossiblePoints.length; i++) {
	let range = body.findText(nextPossiblePoints[i]);
	if (range && range.getStartOffset() < nextIndex) {
	nextIndex = range.getStartOffset();
	indentation = i;
	}
	}

	let text = body.getText().substr(0, nextIndex);
	text = text.substr(text.indexOf(' ') + 1);
	texts.push({
	text: text,
	indentation: indentation,
	});
	body.setText(body.getText().substr(nextIndex + 1));
	if (indentation >= outlinePoint.length) {
	outlinePoint.push(0);
	} else {
	outlinePoint[indentation]++;
	}
	outlinePoint = outlinePoint.slice(0, indentation + 1);
	nextIndex = Infinity;
	}

	return(texts);
	}

	/**
	* Creates ListItems with the correct indentation, given outline points
	* (see extractPoints).
	* @param(Body} body - A document body element.
	* @param{Array} points - A list of outline points.
	*/
	function createOutline(body, points) {
	let currentIndentation = 0;
	let previousListItem = null;

	for (const point of points) {
	let currentListItem = body.appendListItem(point.text);
	currentListItem.setNestingLevel(currentIndentation);

	currentIndentation = point.indentation;
	previousListItem = currentListItem;
	}
	}

	function main() {
	const document = DocumentApp.getActiveDocument();
	const body = document.getBody();

	preformat(body);

	let points = extractPoints(body);
	createOutline(body, points);
	}