Skip to content

Instantly share code, notes, and snippets.

@sdubois
Created June 16, 2023 20:30
Show Gist options
  • Save sdubois/0aec0e149ac16e163feec43d88edb9df to your computer and use it in GitHub Desktop.
Save sdubois/0aec0e149ac16e163feec43d88edb9df to your computer and use it in GitHub Desktop.
// This script converts the active Google Doc to HTML with custom formatting for
// the Drupal footnotes module (https://www.drupal.org/project/footnotes)
function onOpen(e) {
DocumentApp.getUi().createAddonMenu().addItem('Convert', 'ConvertGoogleDocToCleanHtml').addToUi();
}
function ConvertGoogleDocToCleanHtml() {
var body = DocumentApp.getActiveDocument().getBody();
var numChildren = body.getNumChildren();
var output = [];
var images = [];
var listCounters = {};
// Walk through all the child elements of the body.
for (var i = 0; i < numChildren; i++) {
var child = body.getChild(i);
output.push(processItem(child, listCounters, images));
}
var html = output.join('\r');
emailHtml(html, images);
// Uncomment this to generate new document instead of sending email
//createDocumentForHtml(html, images);
}
function emailHtml(html, images) {
// Send an email containing the generated HTML and images as attachments
var attachments = [];
for (var j=0; j<images.length; j++) {
attachments.push( {
"fileName": images[j].name,
"mimeType": images[j].type,
"content": images[j].blob.getBytes() } );
}
var inlineImages = {};
for (var j=0; j<images.length; j++) {
inlineImages[[images[j].name]] = images[j].blob;
}
var name = DocumentApp.getActiveDocument().getName()+".html";
attachments.push({"fileName":name, "mimeType": "text/html", "content": html});
MailApp.sendEmail({
to: Session.getActiveUser().getEmail(),
subject: name,
htmlBody: html,
inlineImages: inlineImages,
attachments: attachments
});
}
function createDocumentForHtml(html, images) {
// Create a new Google Doc containing the generated HTML. This is an alternative to emailHtml
var name = DocumentApp.getActiveDocument().getName()+".html";
var newDoc = DocumentApp.create(name);
newDoc.getBody().setText(html);
for(var j=0; j < images.length; j++)
newDoc.getBody().appendImage(images[j].blob);
newDoc.saveAndClose();
}
function dumpAttributes(atts) {
// Log the paragraph attributes.
for (var att in atts) {
Logger.log(att + ":" + atts[att]);
}
}
function processItem(item, listCounters, images) {
// Process an individual item in the document. Calls more specific functions for text, footnotes and images.
var output = [];
var prefix = "", suffix = "";
if (item.getType() == DocumentApp.ElementType.PARAGRAPH) {
switch (item.getHeading()) {
// Add a # for each heading level. No break, so we accumulate the right number.
case DocumentApp.ParagraphHeading.HEADING6:
prefix = "<h6>", suffix = "</h6>"; break;
case DocumentApp.ParagraphHeading.HEADING5:
prefix = "<h5>", suffix = "</h5>"; break;
case DocumentApp.ParagraphHeading.HEADING4:
prefix = "<h4>", suffix = "</h4>"; break;
case DocumentApp.ParagraphHeading.HEADING3:
prefix = "<h3>", suffix = "</h3>"; break;
case DocumentApp.ParagraphHeading.HEADING2:
prefix = "<h2>", suffix = "</h2>"; break;
case DocumentApp.ParagraphHeading.HEADING1:
prefix = "<h1>", suffix = "</h1>"; break;
default:
prefix = "<p>", suffix = "</p>";
}
if (item.getNumChildren() == 0)
return "";
}
else if (item.getType() == DocumentApp.ElementType.INLINE_IMAGE)
{
processImage(item, images, output);
}
else if (item.getType() == DocumentApp.ElementType.FOOTNOTE)
{
processFootnote(item, output);
}
else if (item.getType()===DocumentApp.ElementType.LIST_ITEM) {
var listItem = item;
var gt = listItem.getGlyphType();
var key = listItem.getListId() + '.' + listItem.getNestingLevel();
var counter = listCounters[key] || 0;
// First list item
if ( counter == 0 ) {
// Bullet list (<ul>):
if (gt === DocumentApp.GlyphType.BULLET
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
prefix = '<ul><li>', suffix = "</li>";
suffix += "</ul>";
}
else {
// Ordered list (<ol>):
prefix = "<ol><li>", suffix = "</li>";
}
}
else {
prefix = "<li>";
suffix = "</li>";
}
if (item.isAtDocumentEnd() || (item.getNextSibling() && (item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM))) {
if (gt === DocumentApp.GlyphType.BULLET
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
suffix += "</ul>";
}
else {
// Ordered list (<ol>):
suffix += "</ol>";
}
}
counter++;
listCounters[key] = counter;
}
output.push(prefix);
if (item.getType() == DocumentApp.ElementType.TEXT) {
processText(item, output);
}
else {
if (item.getNumChildren) {
var numChildren = item.getNumChildren();
// Walk through all the child elements of the doc.
for (var i = 0; i < numChildren; i++) {
var child = item.getChild(i);
output.push(processItem(child, listCounters, images));
}
}
}
output.push(suffix);
return output.join('');
}
function processText(item, output) {
// Process a text item and adds necessary tags
var text = item.getText();
var indices = item.getTextAttributeIndices();
if (indices.length <= 1) {
// Assuming that a whole para fully italic is a quote
if(item.isBold()) {
output.push('<strong>' + text + '</strong>');
}
else if(item.isItalic()) {
output.push('<blockquote>' + text + '</blockquote>');
}
else if (text.trim().indexOf('http://') == 0) {
output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
}
else if (text.trim().indexOf('https://') == 0) {
output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
}
else {
output.push(text);
}
}
else {
for (var i=0; i < indices.length; i ++) {
var partAtts = item.getAttributes(indices[i]);
var startPos = indices[i];
var endPos = i+1 < indices.length ? indices[i+1]: text.length;
var partText = text.substring(startPos, endPos);
Logger.log(partText);
if (partAtts.ITALIC) {
output.push('<i>');
}
if (partAtts.BOLD) {
output.push('<strong>');
}
if (partAtts.UNDERLINE) {
output.push('<u>');
}
// If someone has written [xxx] and made this whole text some special font, like superscript
// then treat it as a reference and make it superscript.
// Unfortunately in Google Docs, there's no way to detect superscript
if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
output.push('<sup>' + partText + '</sup>');
}
else if (partText.trim().indexOf('http://') == 0) {
output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>');
}
else if (partText.trim().indexOf('https://') == 0) {
output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>');
}
else {
output.push(partText);
}
if (partAtts.ITALIC) {
output.push('</i>');
}
if (partAtts.BOLD) {
output.push('</strong>');
}
if (partAtts.UNDERLINE) {
output.push('</u>');
}
}
}
}
function processFootnote(item, output){
// Process a footnote. This is a container object.
var footnoteContents = item.getFootnoteContents();
var footnoteParagraphs = footnoteContents.getParagraphs();
var loopOutput = [];
for (var i=0; i < footnoteParagraphs.length; i++) {
loopOutput.push(processFootnoteItem(footnoteParagraphs[i]));
}
// <fn> tags are a custom tag for the Drupal footnotes module
output.push('<fn>' + loopOutput.join('') + '</fn>');
}
function processFootnoteItem(footnoteItem){
// Process the actual text of the footnote and preserves only bold and italic tags.
output = [];
if (footnoteItem.getType() == DocumentApp.ElementType.PARAGRAPH) {
var length = footnoteItem.getNumChildren();
for (var i=0; i < length; i ++) {
var item = footnoteItem.getChild(i);
var text = item.getText();
var indices = item.getTextAttributeIndices();
// var text = item.getText();
// var indices = item.getTextAttributeIndices();
for (var i=0; i < indices.length; i ++) {
var partAtts = item.getAttributes(indices[i]);
var startPos = indices[i];
var endPos = i+1 < indices.length ? indices[i+1]: text.length;
var partText = text.substring(startPos, endPos);
Logger.log(partText);
if (partAtts.ITALIC) {
output.push('<i>');
}
if (partAtts.BOLD) {
output.push('<strong>');
}
// If someone has written [xxx] and made this whole text some special font, like superscript
// then treat it as a reference and make it superscript.
// Unfortunately in Google Docs, there's no way to detect superscript
if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
output.push('<sup>' + partText + '</sup>');
}
else {
output.push(partText);
}
if (partAtts.ITALIC) {
output.push('</i>');
}
if (partAtts.BOLD) {
output.push('</strong>');
}
}
}
}
return output.join('');
}
function processImage(item, images, output) {
// Processes an image
images = images || [];
var blob = item.getBlob();
var contentType = blob.getContentType();
var extension = "";
if (/\/png$/.test(contentType)) {
extension = ".png";
} else if (/\/gif$/.test(contentType)) {
extension = ".gif";
} else if (/\/jpe?g$/.test(contentType)) {
extension = ".jpg";
} else {
throw "Unsupported image type: "+contentType;
}
var imagePrefix = "Image_";
var imageCounter = images.length;
var name = imagePrefix + imageCounter + extension;
imageCounter++;
output.push('<img src="cid:'+name+'" />');
images.push( {
"blob": blob,
"type": contentType,
"name": name});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment