Skip to content

Instantly share code, notes, and snippets.

@billyeh
Last active December 30, 2021 00:26
Show Gist options
  • Save billyeh/7c1c8f8f9c6e3f30f281a578faab6a69 to your computer and use it in GitHub Desktop.
Save billyeh/7c1c8f8f9c6e3f30f281a578faab6a69 to your computer and use it in GitHub Desktop.
Format text outlines in Google Docs (from PDF conversions)
/**
* Convenient script for formatting Google Docs converted from outline PDF files.
* Simply copy/paste the outline text, and the script cleans up the whitespace
* and correctly formats the document according to the Roman numerals found
* in the text.
/**
* Calculates all the next Roman numeral strings we expect.
* @return {Array} List of outline point strings to search the
* document for. For example, [' I. ', ' C. ', ' 1. '].
*/
function nextOutlinePoints(indices) {
const ret = [];
for (let indentation = 0; indentation <= indices.length; indentation++) {
const index = indentation === indices.length ? -1 : indices[indentation];
const pointString = nextOutlinePointString(indentation, index + 1);
if (pointString) {
ret.push(' ' + pointString + '\\. ');
}
}
return ret;
}
/**
* Gets the string representation of an outline point given its indentation and index.
* @param {number} indentation - Current indentation level.
* @param {number} index - Which point at this indentation level to represent.
* @return {String} The string representation of this outline point.
*/
function nextOutlinePointString(indentation, index) {
switch(indentation) {
case 0: // Capital Roman numerals.
return romanNumeral(index);
case 1: // Capital Latin letters.
return latinAlphabet(index).toUpperCase();
case 2: // Arabic Numbers.
return (index + 1).toString();
case 3: // Lowercase Latin letters.
return latinAlphabet(index).toLowerCase();
}
}
/* Calculates the Latin letter representation of a number index. */
function latinAlphabet(index) {
let currentLetter = 'a';
for (let i = 0; i < index; i++) {
currentLetter = nextLatinAlphabet(currentLetter);
}
return currentLetter;
}
/**
* Gets the next letter in the Latin alphabet, handling upper case and
* wrapping (e.g. z -> aa.).
* Cribbed from https://stackoverflow.com/a/31540111.
*/
function nextLatinAlphabet(key) {
if (key === 'Z' || key === 'z') {
return String.fromCharCode(key.charCodeAt() - 25) + String.fromCharCode(key.charCodeAt() - 25); // AA or aa
} else {
let lastChar = key.slice(-1);
let sub = key.slice(0, -1);
if (lastChar === 'Z' || lastChar === 'z') {
// If a string of length > 1 ends in Z/z,
// increment the string (excluding the last Z/z) recursively,
// and append A/a (depending on casing) to it
return nextLatinAlphabet(sub) + String.fromCharCode(lastChar.charCodeAt() - 25);
} else {
// (take till last char) append with (increment last char)
return sub + String.fromCharCode(lastChar.charCodeAt() + 1);
}
}
return key;
}
/**
* Calculates the Roman numeral representation of a number index.
* Cribbed from https://stackoverflow.com/a/41358305.
*/
function romanNumeral(num) {
num += 1;
const ROMAN = {
M: 1000,
CM: 900,
D: 500,
CD: 400,
C: 100,
XC: 90,
L: 50,
XL: 40,
X: 10,
IX: 9,
V: 5,
IV: 4,
I: 1,
};
let ret = [];
for (let i of Object.keys(ROMAN)) {
let q = Math.floor(num / ROMAN[i]);
num -= q * ROMAN[i];
ret.push(i.repeat(q));
}
return ret.join('');
}
/**
* Cleans the outline before formatting it.
* @param {Body} body - A document body element to clean.
*/
function preformat(body) {
let bodyText = body.getText();
bodyText = bodyText.replace(/\n/g, '');
bodyText = bodyText.replace(/\d+\s+CRYSTALLIZATION STUDY OUTLINES\s+Message [A-Z][a-z]+ \(continuation\)/g, '');
bodyText = bodyText.replace(/\d+\s+JEREMIAH AND LAMENTATIONS Message [A-Z][a-z]+ \(continuation\)/g, '');
body.setText(bodyText);
}
/**
* Extracts outline text in the document into outline formatting.
* @param {Body} body - A document body element to format.
* @return {Array} A list of outline point text with the indentation level
* of the following point. For example,
* [
* {text: "I. First point", indentation: 1},
* {text: "A. Second point", indentation: 1},
* ...
* ]
*/
function extractPoints(body) {
let outlinePoint = [0];
let nextIndex = Infinity;
let texts = [];
while (body.getText().length > 0) {
let nextPossiblePoints = nextOutlinePoints(outlinePoint);
let indentation = 0;
for (let i = 0; i < nextPossiblePoints.length; i++) {
let range = body.findText(nextPossiblePoints[i]);
if (range && range.getStartOffset() < nextIndex) {
nextIndex = range.getStartOffset();
indentation = i;
}
}
let text = body.getText().substr(0, nextIndex);
text = text.substr(text.indexOf(' ') + 1);
texts.push({
text: text,
indentation: indentation,
});
body.setText(body.getText().substr(nextIndex + 1));
if (indentation >= outlinePoint.length) {
outlinePoint.push(0);
} else {
outlinePoint[indentation]++;
}
outlinePoint = outlinePoint.slice(0, indentation + 1);
nextIndex = Infinity;
}
return(texts);
}
/**
* Creates ListItems with the correct indentation, given outline points
* (see extractPoints).
* @param(Body} body - A document body element.
* @param{Array} points - A list of outline points.
*/
function createOutline(body, points) {
let currentIndentation = 0;
let previousListItem = null;
for (const point of points) {
let currentListItem = body.appendListItem(point.text);
currentListItem.setNestingLevel(currentIndentation);
currentIndentation = point.indentation;
previousListItem = currentListItem;
}
}
function main() {
const document = DocumentApp.getActiveDocument();
const body = document.getBody();
preformat(body);
let points = extractPoints(body);
createOutline(body, points);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment