matsumonkie · April 10, 2017 08:05
diff --git a/gistfile1.txt b/gistfile1.txt
 function ConvertGoogleDocToCleanHtml() {
  var body = DocumentApp.getActiveDocument()//.getBody();
  var numChildren = body.getNumChildren();
  var output = [];
  var images = [];
  var listCounters = {};
  
  var toc = {}
  for (var i = 0; i < numChildren; i++) {
    var p = body.getChild(i);
    if (p.getType() == DocumentApp.ElementType.TABLE_OF_CONTENTS) {
      var contents = p.asTableOfContents();
      for (var j = 0; j < contents.getNumChildren(); j++) { // looping over each ToC item
        var itemToc = contents.getChild(j).asParagraph().getChild(0).asText();
        var itemText = itemToc.getText();
        var itemUrl = itemToc.getLinkUrl();
        toc[itemText] = itemUrl; 
       Logger.log("[" + itemText + "] -> " + itemUrl);       
      }
    } else {      
      output.push(processItem(toc, p, listCounters, images));      
    }   
  }

  var html = output.join('\r');
  emailHtml(html, images);
 }


 function emailHtml(html, images) {
  var attachments = [];
  for (var j=0; j<images.length; j++) {
    attachments.push( {
      "fileName": images[j].name,
      "mimeType": images[j].type,
      "content": images[j].blob.getBytes() } );
  }

  var inlineImages = {};
  for (var j=0; j<images.length; j++) {
    inlineImages[[images[j].name]] = images[j].blob;
  }

  var name = DocumentApp.getActiveDocument().getName()+".html";
  attachments.push({"fileName":name, "mimeType": "text/html", "content": html});
  MailApp.sendEmail({
     to: Session.getActiveUser().getEmail(),
     subject: name,
     //htmlBody: html,
     inlineImages: inlineImages,
     attachments: attachments
   });
 }

 function processItem(toc, item, listCounters, images) {
  var output = [];
  var prefix = "", suffix = "";

  if (item.getType() == DocumentApp.ElementType.PARAGRAPH) {
    switch (item.getHeading()) {
      case DocumentApp.ParagraphHeading.HEADING6: 
        title = item.getChild(0).getText().trim();  
        prefix = '<h6 name="' + toc[title] + '">';
        suffix = "</h6>"; 
        break;
      case DocumentApp.ParagraphHeading.HEADING5: 
        title = item.getChild(0).getText().trim();             
        prefix = '<h5 name="' + toc[title] + '">';
        suffix = "</h5>"; 
        break;
      case DocumentApp.ParagraphHeading.HEADING4:
        title = item.getChild(0).getText().trim();             
        prefix = '<h4 name="' + toc[title] + '">';
        suffix = "</h4>"; 
        break;
      case DocumentApp.ParagraphHeading.HEADING3:
        title = item.getChild(0).getText().trim();             
        prefix = '<h3 name="' + toc[title] + '">';
        suffix = "</h3>"; 
        break;
      case DocumentApp.ParagraphHeading.HEADING2:
        title = item.getChild(0).getText().trim();            
        prefix = '<h2 name="' + toc[title] + '">';
        suffix = "</h2>"; 
        break;
      case DocumentApp.ParagraphHeading.HEADING1:
        Logger.log(item.getChild(0))
        title = item.getChild(0).getText().trim();     
        prefix = '<h1 name="' + toc[title] + '">';
        suffix = "</h1>";        
        break;

      default: 
        prefix = "<p>", suffix = "</p>";
    }

    if (item.getNumChildren() == 0)
      return "";
  }
  else if (item.getType() == DocumentApp.ElementType.INLINE_IMAGE)
  {
    processImage(item, images, output);
  }
  else if (item.getType() === DocumentApp.ElementType.LIST_ITEM) {
    var listItem = item;
    var gt = listItem.getGlyphType();
    var key = listItem.getListId() + '.' + listItem.getNestingLevel();
    var counter = listCounters[key] || 0;

    // First list item
    if ( counter == 0 ) {
      // Bullet list (<ul>):
      if (gt === DocumentApp.GlyphType.BULLET
          || gt === DocumentApp.GlyphType.HOLLOW_BULLET
          || gt === DocumentApp.GlyphType.SQUARE_BULLET) {
        prefix = '<ul><li>';
        suffix = "</li>";
      } else {
        // Ordered list (<ol>):
        prefix = "<ol><li>";
        suffix = "</li>";
      }
    }
    else {
      prefix = "<li>";
      suffix = "</li>";
    }

    if (item.isAtDocumentEnd() || item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM) {
      if (gt === DocumentApp.GlyphType.BULLET
          || gt === DocumentApp.GlyphType.HOLLOW_BULLET
          || gt === DocumentApp.GlyphType.SQUARE_BULLET) {
        suffix += "</ul>";
      }
      else {
        // Ordered list (<ol>):
        suffix += "</ol>";
      }

    }

    counter++;
    listCounters[key] = counter;
  }

  output.push(prefix);

  if (item.getType() == DocumentApp.ElementType.TEXT) {
    processText(item, output);
  }
  else {


    if (item.getNumChildren) {
      var numChildren = item.getNumChildren();

      // Walk through all the child elements of the doc.
      for (var i = 0; i < numChildren; i++) {
        var child = item.getChild(i);
        output.push(processItem(toc, child, listCounters, images));
      }
    }

  }

  output.push(suffix);
  return output.join('');
 }

 function processText(item, output) {
  var text = item.getText();
  var indices = item.getTextAttributeIndices();

  if (indices.length <= 1) {
    var partAtts = item.getAttributes(indices[0]);
    
    // Assuming that a whole para fully italic is a quote
    if(item.isBold()) {
      output.push('<b>' + text + '</b>');
    }
    else if(item.isItalic()) {
      output.push('<blockquote>' + text + '</blockquote>');
    }
    else if (text.trim().indexOf('http://') > -1) {
      output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
    }
    else {
      output.push(text);
    }
  }
  else {

    for (var i=0; i < indices.length; i ++) {
      var partAtts = item.getAttributes(indices[i]);
      var startPos = indices[i];
      var endPos = i+1 < indices.length ? indices[i+1]: text.length;
      var partText = text.substring(startPos, endPos);

      if (partAtts.ITALIC) {
        output.push('<i>');
      }
      if (partAtts.BOLD) {
        output.push('<b>');
      }
      if (partAtts.UNDERLINE) {
        output.push('<u>');
      }
      if (partAtts.LINK_URL) {
        url = ""
        if (partAtts.LINK_URL.substring(0, 9) == "#heading=") {
          prefix = "/d/#/guides/content/";
          suffix = partAtts.LINK_URL.substring(1); // remove '#'
          url = prefix + suffix;
        } else {
          url = partAtts.LINK_URL;
        }          
        
        output.push('<a href="' + url + '">')        
      }
      
      output.push(partText);

      if (partAtts.ITALIC) {
        output.push('</i>');
      }
      if (partAtts.BOLD) {
        output.push('</b>');
      }
      if (partAtts.UNDERLINE) {
        output.push('</u>');
      }
      if (partAtts.LINK_URL) {
        output.push('</a>')
      }

    }
  }
 }

 function processImage(item, images, output)
 {
  images = images || [];
  var blob = item.getBlob();
  var contentType = blob.getContentType();
  var extension = "";
  if (/\/png$/.test(contentType)) {
    extension = ".png";
  } else if (/\/gif$/.test(contentType)) {
    extension = ".gif";
  } else if (/\/jpe?g$/.test(contentType)) {
    extension = ".jpg";
  } else {
    throw "Unsupported image type: "+contentType;
  }
  var imagePrefix = "Image_";
  var imageCounter = images.length;
  var name = imagePrefix + imageCounter + extension;
  imageCounter++;
  output.push('<img src="cid:'+name+'" />');
  images.push( {
    "blob": blob,
    "type": contentType,
    "name": name});
 }
	function ConvertGoogleDocToCleanHtml() {
	var body = DocumentApp.getActiveDocument()//.getBody();
	var numChildren = body.getNumChildren();
	var output = [];
	var images = [];
	var listCounters = {};

	var toc = {}
	for (var i = 0; i < numChildren; i++) {
	var p = body.getChild(i);
	if (p.getType() == DocumentApp.ElementType.TABLE_OF_CONTENTS) {
	var contents = p.asTableOfContents();
	for (var j = 0; j < contents.getNumChildren(); j++) { // looping over each ToC item
	var itemToc = contents.getChild(j).asParagraph().getChild(0).asText();
	var itemText = itemToc.getText();
	var itemUrl = itemToc.getLinkUrl();
	toc[itemText] = itemUrl;
	Logger.log("[" + itemText + "] -> " + itemUrl);
	}
	} else {
	output.push(processItem(toc, p, listCounters, images));
	}
	}

	var html = output.join('\r');
	emailHtml(html, images);
	}


	function emailHtml(html, images) {
	var attachments = [];
	for (var j=0; j<images.length; j++) {
	attachments.push( {
	"fileName": images[j].name,
	"mimeType": images[j].type,
	"content": images[j].blob.getBytes() } );
	}

	var inlineImages = {};
	for (var j=0; j<images.length; j++) {
	inlineImages[[images[j].name]] = images[j].blob;
	}

	var name = DocumentApp.getActiveDocument().getName()+".html";
	attachments.push({"fileName":name, "mimeType": "text/html", "content": html});
	MailApp.sendEmail({
	to: Session.getActiveUser().getEmail(),
	subject: name,
	//htmlBody: html,
	inlineImages: inlineImages,
	attachments: attachments
	});
	}

	function processItem(toc, item, listCounters, images) {
	var output = [];
	var prefix = "", suffix = "";

	if (item.getType() == DocumentApp.ElementType.PARAGRAPH) {
	switch (item.getHeading()) {
	case DocumentApp.ParagraphHeading.HEADING6:
	title = item.getChild(0).getText().trim();
	prefix = '<h6 name="' + toc[title] + '">';
	suffix = "</h6>";
	break;
	case DocumentApp.ParagraphHeading.HEADING5:
	title = item.getChild(0).getText().trim();
	prefix = '<h5 name="' + toc[title] + '">';
	suffix = "</h5>";
	break;
	case DocumentApp.ParagraphHeading.HEADING4:
	title = item.getChild(0).getText().trim();
	prefix = '<h4 name="' + toc[title] + '">';
	suffix = "</h4>";
	break;
	case DocumentApp.ParagraphHeading.HEADING3:
	title = item.getChild(0).getText().trim();
	prefix = '<h3 name="' + toc[title] + '">';
	suffix = "</h3>";
	break;
	case DocumentApp.ParagraphHeading.HEADING2:
	title = item.getChild(0).getText().trim();
	prefix = '<h2 name="' + toc[title] + '">';
	suffix = "</h2>";
	break;
	case DocumentApp.ParagraphHeading.HEADING1:
	Logger.log(item.getChild(0))
	title = item.getChild(0).getText().trim();
	prefix = '<h1 name="' + toc[title] + '">';
	suffix = "</h1>";
	break;

	default:
	prefix = "<p>", suffix = "</p>";
	}

	if (item.getNumChildren() == 0)
	return "";
	}
	else if (item.getType() == DocumentApp.ElementType.INLINE_IMAGE)
	{
	processImage(item, images, output);
	}
	else if (item.getType() === DocumentApp.ElementType.LIST_ITEM) {
	var listItem = item;
	var gt = listItem.getGlyphType();
	var key = listItem.getListId() + '.' + listItem.getNestingLevel();
	var counter = listCounters[key] \|\| 0;

	// First list item
	if ( counter == 0 ) {
	// Bullet list (<ul>):
	if (gt === DocumentApp.GlyphType.BULLET
	\|\| gt === DocumentApp.GlyphType.HOLLOW_BULLET
	\|\| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
	prefix = '<ul><li>';
	suffix = "</li>";
	} else {
	// Ordered list (<ol>):
	prefix = "<ol><li>";
	suffix = "</li>";
	}
	}
	else {
	prefix = "<li>";
	suffix = "</li>";
	}

	if (item.isAtDocumentEnd() \|\| item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM) {
	if (gt === DocumentApp.GlyphType.BULLET
	\|\| gt === DocumentApp.GlyphType.HOLLOW_BULLET
	\|\| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
	suffix += "</ul>";
	}
	else {
	// Ordered list (<ol>):
	suffix += "</ol>";
	}

	}

	counter++;
	listCounters[key] = counter;
	}

	output.push(prefix);

	if (item.getType() == DocumentApp.ElementType.TEXT) {
	processText(item, output);
	}
	else {


	if (item.getNumChildren) {
	var numChildren = item.getNumChildren();

	// Walk through all the child elements of the doc.
	for (var i = 0; i < numChildren; i++) {
	var child = item.getChild(i);
	output.push(processItem(toc, child, listCounters, images));
	}
	}

	}

	output.push(suffix);
	return output.join('');
	}

	function processText(item, output) {
	var text = item.getText();
	var indices = item.getTextAttributeIndices();

	if (indices.length <= 1) {
	var partAtts = item.getAttributes(indices[0]);

	// Assuming that a whole para fully italic is a quote
	if(item.isBold()) {
	output.push('<b>' + text + '</b>');
	}
	else if(item.isItalic()) {
	output.push('<blockquote>' + text + '</blockquote>');
	}
	else if (text.trim().indexOf('http://') > -1) {
	output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
	}
	else {
	output.push(text);
	}
	}
	else {

	for (var i=0; i < indices.length; i ++) {
	var partAtts = item.getAttributes(indices[i]);
	var startPos = indices[i];
	var endPos = i+1 < indices.length ? indices[i+1]: text.length;
	var partText = text.substring(startPos, endPos);

	if (partAtts.ITALIC) {
	output.push('<i>');
	}
	if (partAtts.BOLD) {
	output.push('<b>');
	}
	if (partAtts.UNDERLINE) {
	output.push('<u>');
	}
	if (partAtts.LINK_URL) {
	url = ""
	if (partAtts.LINK_URL.substring(0, 9) == "#heading=") {
	prefix = "/d/#/guides/content/";
	suffix = partAtts.LINK_URL.substring(1); // remove '#'
	url = prefix + suffix;
	} else {
	url = partAtts.LINK_URL;
	}

	output.push('<a href="' + url + '">')
	}

	output.push(partText);

	if (partAtts.ITALIC) {
	output.push('</i>');
	}
	if (partAtts.BOLD) {
	output.push('</b>');
	}
	if (partAtts.UNDERLINE) {
	output.push('</u>');
	}
	if (partAtts.LINK_URL) {
	output.push('</a>')
	}

	}
	}
	}

	function processImage(item, images, output)
	{
	images = images \|\| [];
	var blob = item.getBlob();
	var contentType = blob.getContentType();
	var extension = "";
	if (/\/png$/.test(contentType)) {
	extension = ".png";
	} else if (/\/gif$/.test(contentType)) {
	extension = ".gif";
	} else if (/\/jpe?g$/.test(contentType)) {
	extension = ".jpg";
	} else {
	throw "Unsupported image type: "+contentType;
	}
	var imagePrefix = "Image_";
	var imageCounter = images.length;
	var name = imagePrefix + imageCounter + extension;
	imageCounter++;
	output.push('<img src="cid:'+name+'" />');
	images.push( {
	"blob": blob,
	"type": contentType,
	"name": name});
	}