Last active
December 30, 2021 00:26
-
-
Save billyeh/7c1c8f8f9c6e3f30f281a578faab6a69 to your computer and use it in GitHub Desktop.
Format text outlines in Google Docs (from PDF conversions)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Convenient script for formatting Google Docs converted from outline PDF files. | |
* Simply copy/paste the outline text, and the script cleans up the whitespace | |
* and correctly formats the document according to the Roman numerals found | |
* in the text. | |
/** | |
* Calculates all the next Roman numeral strings we expect. | |
* @return {Array} List of outline point strings to search the | |
* document for. For example, [' I. ', ' C. ', ' 1. ']. | |
*/ | |
function nextOutlinePoints(indices) { | |
const ret = []; | |
for (let indentation = 0; indentation <= indices.length; indentation++) { | |
const index = indentation === indices.length ? -1 : indices[indentation]; | |
const pointString = nextOutlinePointString(indentation, index + 1); | |
if (pointString) { | |
ret.push(' ' + pointString + '\\. '); | |
} | |
} | |
return ret; | |
} | |
/** | |
* Gets the string representation of an outline point given its indentation and index. | |
* @param {number} indentation - Current indentation level. | |
* @param {number} index - Which point at this indentation level to represent. | |
* @return {String} The string representation of this outline point. | |
*/ | |
function nextOutlinePointString(indentation, index) { | |
switch(indentation) { | |
case 0: // Capital Roman numerals. | |
return romanNumeral(index); | |
case 1: // Capital Latin letters. | |
return latinAlphabet(index).toUpperCase(); | |
case 2: // Arabic Numbers. | |
return (index + 1).toString(); | |
case 3: // Lowercase Latin letters. | |
return latinAlphabet(index).toLowerCase(); | |
} | |
} | |
/* Calculates the Latin letter representation of a number index. */ | |
function latinAlphabet(index) { | |
let currentLetter = 'a'; | |
for (let i = 0; i < index; i++) { | |
currentLetter = nextLatinAlphabet(currentLetter); | |
} | |
return currentLetter; | |
} | |
/** | |
* Gets the next letter in the Latin alphabet, handling upper case and | |
* wrapping (e.g. z -> aa.). | |
* Cribbed from https://stackoverflow.com/a/31540111. | |
*/ | |
function nextLatinAlphabet(key) { | |
if (key === 'Z' || key === 'z') { | |
return String.fromCharCode(key.charCodeAt() - 25) + String.fromCharCode(key.charCodeAt() - 25); // AA or aa | |
} else { | |
let lastChar = key.slice(-1); | |
let sub = key.slice(0, -1); | |
if (lastChar === 'Z' || lastChar === 'z') { | |
// If a string of length > 1 ends in Z/z, | |
// increment the string (excluding the last Z/z) recursively, | |
// and append A/a (depending on casing) to it | |
return nextLatinAlphabet(sub) + String.fromCharCode(lastChar.charCodeAt() - 25); | |
} else { | |
// (take till last char) append with (increment last char) | |
return sub + String.fromCharCode(lastChar.charCodeAt() + 1); | |
} | |
} | |
return key; | |
} | |
/** | |
* Calculates the Roman numeral representation of a number index. | |
* Cribbed from https://stackoverflow.com/a/41358305. | |
*/ | |
function romanNumeral(num) { | |
num += 1; | |
const ROMAN = { | |
M: 1000, | |
CM: 900, | |
D: 500, | |
CD: 400, | |
C: 100, | |
XC: 90, | |
L: 50, | |
XL: 40, | |
X: 10, | |
IX: 9, | |
V: 5, | |
IV: 4, | |
I: 1, | |
}; | |
let ret = []; | |
for (let i of Object.keys(ROMAN)) { | |
let q = Math.floor(num / ROMAN[i]); | |
num -= q * ROMAN[i]; | |
ret.push(i.repeat(q)); | |
} | |
return ret.join(''); | |
} | |
/** | |
* Cleans the outline before formatting it. | |
* @param {Body} body - A document body element to clean. | |
*/ | |
function preformat(body) { | |
let bodyText = body.getText(); | |
bodyText = bodyText.replace(/\n/g, ''); | |
bodyText = bodyText.replace(/\d+\s+CRYSTALLIZATION STUDY OUTLINES\s+Message [A-Z][a-z]+ \(continuation\)/g, ''); | |
bodyText = bodyText.replace(/\d+\s+JEREMIAH AND LAMENTATIONS Message [A-Z][a-z]+ \(continuation\)/g, ''); | |
body.setText(bodyText); | |
} | |
/** | |
* Extracts outline text in the document into outline formatting. | |
* @param {Body} body - A document body element to format. | |
* @return {Array} A list of outline point text with the indentation level | |
* of the following point. For example, | |
* [ | |
* {text: "I. First point", indentation: 1}, | |
* {text: "A. Second point", indentation: 1}, | |
* ... | |
* ] | |
*/ | |
function extractPoints(body) { | |
let outlinePoint = [0]; | |
let nextIndex = Infinity; | |
let texts = []; | |
while (body.getText().length > 0) { | |
let nextPossiblePoints = nextOutlinePoints(outlinePoint); | |
let indentation = 0; | |
for (let i = 0; i < nextPossiblePoints.length; i++) { | |
let range = body.findText(nextPossiblePoints[i]); | |
if (range && range.getStartOffset() < nextIndex) { | |
nextIndex = range.getStartOffset(); | |
indentation = i; | |
} | |
} | |
let text = body.getText().substr(0, nextIndex); | |
text = text.substr(text.indexOf(' ') + 1); | |
texts.push({ | |
text: text, | |
indentation: indentation, | |
}); | |
body.setText(body.getText().substr(nextIndex + 1)); | |
if (indentation >= outlinePoint.length) { | |
outlinePoint.push(0); | |
} else { | |
outlinePoint[indentation]++; | |
} | |
outlinePoint = outlinePoint.slice(0, indentation + 1); | |
nextIndex = Infinity; | |
} | |
return(texts); | |
} | |
/** | |
* Creates ListItems with the correct indentation, given outline points | |
* (see extractPoints). | |
* @param(Body} body - A document body element. | |
* @param{Array} points - A list of outline points. | |
*/ | |
function createOutline(body, points) { | |
let currentIndentation = 0; | |
let previousListItem = null; | |
for (const point of points) { | |
let currentListItem = body.appendListItem(point.text); | |
currentListItem.setNestingLevel(currentIndentation); | |
currentIndentation = point.indentation; | |
previousListItem = currentListItem; | |
} | |
} | |
function main() { | |
const document = DocumentApp.getActiveDocument(); | |
const body = document.getBody(); | |
preformat(body); | |
let points = extractPoints(body); | |
createOutline(body, points); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment