Created
June 16, 2023 20:30
-
-
Save sdubois/0aec0e149ac16e163feec43d88edb9df to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This script converts the active Google Doc to HTML with custom formatting for | |
// the Drupal footnotes module (https://www.drupal.org/project/footnotes) | |
function onOpen(e) { | |
DocumentApp.getUi().createAddonMenu().addItem('Convert', 'ConvertGoogleDocToCleanHtml').addToUi(); | |
} | |
function ConvertGoogleDocToCleanHtml() { | |
var body = DocumentApp.getActiveDocument().getBody(); | |
var numChildren = body.getNumChildren(); | |
var output = []; | |
var images = []; | |
var listCounters = {}; | |
// Walk through all the child elements of the body. | |
for (var i = 0; i < numChildren; i++) { | |
var child = body.getChild(i); | |
output.push(processItem(child, listCounters, images)); | |
} | |
var html = output.join('\r'); | |
emailHtml(html, images); | |
// Uncomment this to generate new document instead of sending email | |
//createDocumentForHtml(html, images); | |
} | |
function emailHtml(html, images) { | |
// Send an email containing the generated HTML and images as attachments | |
var attachments = []; | |
for (var j=0; j<images.length; j++) { | |
attachments.push( { | |
"fileName": images[j].name, | |
"mimeType": images[j].type, | |
"content": images[j].blob.getBytes() } ); | |
} | |
var inlineImages = {}; | |
for (var j=0; j<images.length; j++) { | |
inlineImages[[images[j].name]] = images[j].blob; | |
} | |
var name = DocumentApp.getActiveDocument().getName()+".html"; | |
attachments.push({"fileName":name, "mimeType": "text/html", "content": html}); | |
MailApp.sendEmail({ | |
to: Session.getActiveUser().getEmail(), | |
subject: name, | |
htmlBody: html, | |
inlineImages: inlineImages, | |
attachments: attachments | |
}); | |
} | |
function createDocumentForHtml(html, images) { | |
// Create a new Google Doc containing the generated HTML. This is an alternative to emailHtml | |
var name = DocumentApp.getActiveDocument().getName()+".html"; | |
var newDoc = DocumentApp.create(name); | |
newDoc.getBody().setText(html); | |
for(var j=0; j < images.length; j++) | |
newDoc.getBody().appendImage(images[j].blob); | |
newDoc.saveAndClose(); | |
} | |
function dumpAttributes(atts) { | |
// Log the paragraph attributes. | |
for (var att in atts) { | |
Logger.log(att + ":" + atts[att]); | |
} | |
} | |
function processItem(item, listCounters, images) { | |
// Process an individual item in the document. Calls more specific functions for text, footnotes and images. | |
var output = []; | |
var prefix = "", suffix = ""; | |
if (item.getType() == DocumentApp.ElementType.PARAGRAPH) { | |
switch (item.getHeading()) { | |
// Add a # for each heading level. No break, so we accumulate the right number. | |
case DocumentApp.ParagraphHeading.HEADING6: | |
prefix = "<h6>", suffix = "</h6>"; break; | |
case DocumentApp.ParagraphHeading.HEADING5: | |
prefix = "<h5>", suffix = "</h5>"; break; | |
case DocumentApp.ParagraphHeading.HEADING4: | |
prefix = "<h4>", suffix = "</h4>"; break; | |
case DocumentApp.ParagraphHeading.HEADING3: | |
prefix = "<h3>", suffix = "</h3>"; break; | |
case DocumentApp.ParagraphHeading.HEADING2: | |
prefix = "<h2>", suffix = "</h2>"; break; | |
case DocumentApp.ParagraphHeading.HEADING1: | |
prefix = "<h1>", suffix = "</h1>"; break; | |
default: | |
prefix = "<p>", suffix = "</p>"; | |
} | |
if (item.getNumChildren() == 0) | |
return ""; | |
} | |
else if (item.getType() == DocumentApp.ElementType.INLINE_IMAGE) | |
{ | |
processImage(item, images, output); | |
} | |
else if (item.getType() == DocumentApp.ElementType.FOOTNOTE) | |
{ | |
processFootnote(item, output); | |
} | |
else if (item.getType()===DocumentApp.ElementType.LIST_ITEM) { | |
var listItem = item; | |
var gt = listItem.getGlyphType(); | |
var key = listItem.getListId() + '.' + listItem.getNestingLevel(); | |
var counter = listCounters[key] || 0; | |
// First list item | |
if ( counter == 0 ) { | |
// Bullet list (<ul>): | |
if (gt === DocumentApp.GlyphType.BULLET | |
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET | |
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) { | |
prefix = '<ul><li>', suffix = "</li>"; | |
suffix += "</ul>"; | |
} | |
else { | |
// Ordered list (<ol>): | |
prefix = "<ol><li>", suffix = "</li>"; | |
} | |
} | |
else { | |
prefix = "<li>"; | |
suffix = "</li>"; | |
} | |
if (item.isAtDocumentEnd() || (item.getNextSibling() && (item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM))) { | |
if (gt === DocumentApp.GlyphType.BULLET | |
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET | |
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) { | |
suffix += "</ul>"; | |
} | |
else { | |
// Ordered list (<ol>): | |
suffix += "</ol>"; | |
} | |
} | |
counter++; | |
listCounters[key] = counter; | |
} | |
output.push(prefix); | |
if (item.getType() == DocumentApp.ElementType.TEXT) { | |
processText(item, output); | |
} | |
else { | |
if (item.getNumChildren) { | |
var numChildren = item.getNumChildren(); | |
// Walk through all the child elements of the doc. | |
for (var i = 0; i < numChildren; i++) { | |
var child = item.getChild(i); | |
output.push(processItem(child, listCounters, images)); | |
} | |
} | |
} | |
output.push(suffix); | |
return output.join(''); | |
} | |
function processText(item, output) { | |
// Process a text item and adds necessary tags | |
var text = item.getText(); | |
var indices = item.getTextAttributeIndices(); | |
if (indices.length <= 1) { | |
// Assuming that a whole para fully italic is a quote | |
if(item.isBold()) { | |
output.push('<strong>' + text + '</strong>'); | |
} | |
else if(item.isItalic()) { | |
output.push('<blockquote>' + text + '</blockquote>'); | |
} | |
else if (text.trim().indexOf('http://') == 0) { | |
output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>'); | |
} | |
else if (text.trim().indexOf('https://') == 0) { | |
output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>'); | |
} | |
else { | |
output.push(text); | |
} | |
} | |
else { | |
for (var i=0; i < indices.length; i ++) { | |
var partAtts = item.getAttributes(indices[i]); | |
var startPos = indices[i]; | |
var endPos = i+1 < indices.length ? indices[i+1]: text.length; | |
var partText = text.substring(startPos, endPos); | |
Logger.log(partText); | |
if (partAtts.ITALIC) { | |
output.push('<i>'); | |
} | |
if (partAtts.BOLD) { | |
output.push('<strong>'); | |
} | |
if (partAtts.UNDERLINE) { | |
output.push('<u>'); | |
} | |
// If someone has written [xxx] and made this whole text some special font, like superscript | |
// then treat it as a reference and make it superscript. | |
// Unfortunately in Google Docs, there's no way to detect superscript | |
if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') { | |
output.push('<sup>' + partText + '</sup>'); | |
} | |
else if (partText.trim().indexOf('http://') == 0) { | |
output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>'); | |
} | |
else if (partText.trim().indexOf('https://') == 0) { | |
output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>'); | |
} | |
else { | |
output.push(partText); | |
} | |
if (partAtts.ITALIC) { | |
output.push('</i>'); | |
} | |
if (partAtts.BOLD) { | |
output.push('</strong>'); | |
} | |
if (partAtts.UNDERLINE) { | |
output.push('</u>'); | |
} | |
} | |
} | |
} | |
function processFootnote(item, output){ | |
// Process a footnote. This is a container object. | |
var footnoteContents = item.getFootnoteContents(); | |
var footnoteParagraphs = footnoteContents.getParagraphs(); | |
var loopOutput = []; | |
for (var i=0; i < footnoteParagraphs.length; i++) { | |
loopOutput.push(processFootnoteItem(footnoteParagraphs[i])); | |
} | |
// <fn> tags are a custom tag for the Drupal footnotes module | |
output.push('<fn>' + loopOutput.join('') + '</fn>'); | |
} | |
function processFootnoteItem(footnoteItem){ | |
// Process the actual text of the footnote and preserves only bold and italic tags. | |
output = []; | |
if (footnoteItem.getType() == DocumentApp.ElementType.PARAGRAPH) { | |
var length = footnoteItem.getNumChildren(); | |
for (var i=0; i < length; i ++) { | |
var item = footnoteItem.getChild(i); | |
var text = item.getText(); | |
var indices = item.getTextAttributeIndices(); | |
// var text = item.getText(); | |
// var indices = item.getTextAttributeIndices(); | |
for (var i=0; i < indices.length; i ++) { | |
var partAtts = item.getAttributes(indices[i]); | |
var startPos = indices[i]; | |
var endPos = i+1 < indices.length ? indices[i+1]: text.length; | |
var partText = text.substring(startPos, endPos); | |
Logger.log(partText); | |
if (partAtts.ITALIC) { | |
output.push('<i>'); | |
} | |
if (partAtts.BOLD) { | |
output.push('<strong>'); | |
} | |
// If someone has written [xxx] and made this whole text some special font, like superscript | |
// then treat it as a reference and make it superscript. | |
// Unfortunately in Google Docs, there's no way to detect superscript | |
if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') { | |
output.push('<sup>' + partText + '</sup>'); | |
} | |
else { | |
output.push(partText); | |
} | |
if (partAtts.ITALIC) { | |
output.push('</i>'); | |
} | |
if (partAtts.BOLD) { | |
output.push('</strong>'); | |
} | |
} | |
} | |
} | |
return output.join(''); | |
} | |
function processImage(item, images, output) { | |
// Processes an image | |
images = images || []; | |
var blob = item.getBlob(); | |
var contentType = blob.getContentType(); | |
var extension = ""; | |
if (/\/png$/.test(contentType)) { | |
extension = ".png"; | |
} else if (/\/gif$/.test(contentType)) { | |
extension = ".gif"; | |
} else if (/\/jpe?g$/.test(contentType)) { | |
extension = ".jpg"; | |
} else { | |
throw "Unsupported image type: "+contentType; | |
} | |
var imagePrefix = "Image_"; | |
var imageCounter = images.length; | |
var name = imagePrefix + imageCounter + extension; | |
imageCounter++; | |
output.push('<img src="cid:'+name+'" />'); | |
images.push( { | |
"blob": blob, | |
"type": contentType, | |
"name": name}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment