Created
January 8, 2018 20:29
-
-
Save trevor-atlas/004ee0bf6a63241f1c212f7d640df2a8 to your computer and use it in GitHub Desktop.
convert a google doc to HTML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function ConvertGoogleDocToCleanHtml() { | |
var body = DocumentApp.getActiveDocument().getBody(); | |
var numChildren = body.getNumChildren(); | |
var output = []; | |
var images = []; | |
var listCounters = {}; | |
// Walk through all the child elements of the body. | |
for (var i = 0; i < numChildren; i++) { | |
var child = body.getChild(i); | |
output.push( processItem(child, listCounters, images).replace('<li><p>', '<li>').replace('</p></li>', '</li>') ); | |
} | |
var html = output.join('\r'); | |
emailHtml(html, images); | |
//createDocumentForHtml(html, images); | |
} | |
function emailHtml(html, images) { | |
var attachments = []; | |
var j; | |
for (j = 0; j<images.length; j++) { | |
attachments.push( { | |
"fileName": images[j].name, | |
"mimeType": images[j].type, | |
"content": images[j].blob.getBytes() | |
} ); | |
} | |
var inlineImages = {}; | |
for (j = 0; j<images.length; j++) { | |
inlineImages[[images[j].name]] = images[j].blob; | |
} | |
var name = DocumentApp.getActiveDocument().getName()+".html"; | |
attachments.push({"fileName":name, "mimeType": "text/html", "content": html}); | |
MailApp.sendEmail({ | |
to: Session.getActiveUser().getEmail(), | |
subject: name, | |
htmlBody: html, | |
inlineImages: inlineImages, | |
attachments: attachments | |
}); | |
} | |
function createDocumentForHtml(html, images) { | |
var name = DocumentApp.getActiveDocument().getName()+".html"; | |
var newDoc = DocumentApp.create(name); | |
newDoc.getBody().setText(html); | |
for(var j=0; j < images.length; j++) | |
newDoc.getBody().appendImage(images[j].blob); | |
newDoc.saveAndClose(); | |
} | |
function dumpAttributes(atts) { | |
// Log the paragraph attributes. | |
for (var att in atts) { | |
Logger.log(att + ":" + atts[att]); | |
} | |
} | |
function processItem(item, listCounters, images) { | |
var output = []; | |
var prefix = "", suffix = ""; | |
if (item.getType() == DocumentApp.ElementType.PARAGRAPH) { | |
if (item.getNumChildren() === 0) { | |
return ""; | |
} | |
var isHeading = item.getHeading(); | |
if (isHeading == DocumentApp.ParagraphHeading.HEADING6) { | |
prefix = "<h6>"; | |
suffix = "</h6>"; | |
} | |
else if (isHeading == DocumentApp.ParagraphHeading.HEADING5) { | |
prefix = "<h5>"; | |
suffix = "</h5>"; | |
} | |
else if (isHeading == DocumentApp.ParagraphHeading.HEADING4) { | |
prefix = "<h4>"; | |
suffix = "</h4>"; | |
} | |
else if (isHeading == DocumentApp.ParagraphHeading.HEADING3) { | |
prefix = "<h3>"; | |
suffix = "</h3>"; | |
} | |
else if (isHeading == DocumentApp.ParagraphHeading.HEADING2) { | |
prefix = "<h2>"; | |
suffix = "</h2>"; | |
} | |
else if (isHeading == DocumentApp.ParagraphHeading.HEADING1) { | |
prefix = "<h1>"; | |
suffix = "</h1>"; | |
} | |
} | |
if (item.getType() == DocumentApp.ElementType.INLINE_IMAGE) { | |
processImage(item, images, output); | |
} | |
if (item.getType() === DocumentApp.ElementType.LIST_ITEM) { | |
var listItem = item; | |
var gt = listItem.getGlyphType(); | |
var key = listItem.getListId() + '.' + listItem.getNestingLevel(); | |
var counter = listCounters[key] || 0; | |
// First list item | |
if ( counter === 0 ) { | |
if (gt === DocumentApp.GlyphType.BULLET || gt === DocumentApp.GlyphType.HOLLOW_BULLET || gt === DocumentApp.GlyphType.SQUARE_BULLET) { | |
prefix = '[ul]\n<li>'; | |
suffix = "</li>"; | |
} | |
else { | |
prefix = "<ol>\n<li>"; | |
suffix = "</li>"; | |
} | |
} | |
else { | |
prefix = "<li>"; | |
suffix = "</li>"; | |
} | |
if (item.isAtDocumentEnd() || (item.getNextSibling() && (item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM))) { | |
if (gt === DocumentApp.GlyphType.BULLET || gt === DocumentApp.GlyphType.HOLLOW_BULLET || gt === DocumentApp.GlyphType.SQUARE_BULLET) { | |
suffix += "\n[/ul]"; | |
} | |
else { | |
suffix += "\n</ol>"; | |
} | |
} | |
counter++; | |
listCounters[key] = counter; | |
} | |
if (item.getType() == DocumentApp.ElementType.TEXT) { | |
var text = item.getText(); | |
if (text.indexOf('[h6]') === 0) { | |
prefix = "<h6>"; | |
suffix = "</h6>"; | |
} | |
else if (text.indexOf('[h5]') === 0) { | |
prefix = "<h5>"; | |
suffix = "</h5>"; | |
} | |
else if (text.indexOf('[h4]') === 0) { | |
prefix = "<h4>"; | |
suffix = "</h4>"; | |
} | |
else if (text.indexOf('[h3]') === 0) { | |
prefix = "<h3>"; | |
suffix = "</h3>"; | |
} | |
else if (text.indexOf('[h2]') === 0) { | |
prefix = "<h2>"; | |
suffix = "</h2>"; | |
} | |
else if (text.indexOf('[h1]') === 0) { | |
prefix = "<h1>"; | |
suffix = "</h1>"; | |
} else { | |
prefix = "<p>"; | |
suffix = "</p>"; | |
} | |
} | |
output.push(prefix); | |
if (item.getType() == DocumentApp.ElementType.TEXT) { | |
processText(item, output, prefix); | |
} | |
if (item.getNumChildren) { | |
var numChildren = item.getNumChildren(); | |
// Walk through all the child elements of the doc. | |
for (var i = 0; i < numChildren; i++) { | |
var child = item.getChild(i); | |
output.push(processItem(child, listCounters, images)); | |
} | |
} | |
output.push(suffix); | |
return output.join(''); | |
} | |
function processText(item, output) { | |
var text = item.getText(); | |
text = text.replace(/\[h\d\]/, '').trim(); | |
output.push(text); | |
} | |
function processImage(item, images, output) { | |
images = images || []; | |
var blob = item.getBlob(); | |
var contentType = blob.getContentType(); | |
//var extension = ""; | |
var extension = contentType.split('.').pop(); | |
// | |
// if (/\/png$/.test(contentType)) { | |
// extension = ".png"; | |
// } else if (/\/gif$/.test(contentType)) { | |
// extension = ".gif"; | |
// } else if (/\/jpe?g$/.test(contentType)) { | |
// extension = ".jpg"; | |
// } else { | |
// throw "Unsupported image type: " + contentType; | |
// } | |
var imagePrefix = "Image_"; | |
var imageCounter = images.length; | |
var name = imagePrefix + imageCounter + extension; | |
imageCounter++; | |
output.push('<img src="cid:' + name + '" />'); | |
images.push( { | |
"blob": blob, | |
"type": contentType, | |
"name": name | |
} ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment