sdubois · June 16, 2023 20:30
diff --git a/gistfile1.txt b/gistfile1.txt
 // This script converts the active Google Doc to HTML with custom formatting for
 // the Drupal footnotes module (https://www.drupal.org/project/footnotes)


 function onOpen(e) {
  DocumentApp.getUi().createAddonMenu().addItem('Convert', 'ConvertGoogleDocToCleanHtml').addToUi();
 }

 function ConvertGoogleDocToCleanHtml() {
  var body = DocumentApp.getActiveDocument().getBody();
  var numChildren = body.getNumChildren();
  var output = [];
  var images = [];
  var listCounters = {};

  // Walk through all the child elements of the body.
  for (var i = 0; i < numChildren; i++) {
    var child = body.getChild(i);
    output.push(processItem(child, listCounters, images));
  }

  var html = output.join('\r');
  emailHtml(html, images);
  // Uncomment this to generate new document instead of sending email
  //createDocumentForHtml(html, images);
 }

 function emailHtml(html, images) {
  // Send an email containing the generated HTML and images as attachments
  var attachments = [];
  for (var j=0; j<images.length; j++) {
    attachments.push( {
      "fileName": images[j].name,
      "mimeType": images[j].type,
      "content": images[j].blob.getBytes() } );
  }

  var inlineImages = {};
  for (var j=0; j<images.length; j++) {
    inlineImages[[images[j].name]] = images[j].blob;
  }

  var name = DocumentApp.getActiveDocument().getName()+".html";
  attachments.push({"fileName":name, "mimeType": "text/html", "content": html});
  MailApp.sendEmail({
     to: Session.getActiveUser().getEmail(),
     subject: name,
     htmlBody: html,
     inlineImages: inlineImages,
     attachments: attachments
   });
 }

 function createDocumentForHtml(html, images) {
  // Create a new Google Doc containing the generated HTML. This is an alternative to emailHtml
  var name = DocumentApp.getActiveDocument().getName()+".html";
  var newDoc = DocumentApp.create(name);
  newDoc.getBody().setText(html);
  for(var j=0; j < images.length; j++)
    newDoc.getBody().appendImage(images[j].blob);
  newDoc.saveAndClose();
 }

 function dumpAttributes(atts) {
  // Log the paragraph attributes.
  for (var att in atts) {
    Logger.log(att + ":" + atts[att]);
  }
 }

 function processItem(item, listCounters, images) {
  // Process an individual item in the document. Calls more specific functions for text, footnotes and images.
  var output = [];
  var prefix = "", suffix = "";
  if (item.getType() == DocumentApp.ElementType.PARAGRAPH) {
    switch (item.getHeading()) {
        // Add a # for each heading level. No break, so we accumulate the right number.
      case DocumentApp.ParagraphHeading.HEADING6: 
        prefix = "<h6>", suffix = "</h6>"; break;
      case DocumentApp.ParagraphHeading.HEADING5: 
        prefix = "<h5>", suffix = "</h5>"; break;
      case DocumentApp.ParagraphHeading.HEADING4:
        prefix = "<h4>", suffix = "</h4>"; break;
      case DocumentApp.ParagraphHeading.HEADING3:
        prefix = "<h3>", suffix = "</h3>"; break;
      case DocumentApp.ParagraphHeading.HEADING2:
        prefix = "<h2>", suffix = "</h2>"; break;
      case DocumentApp.ParagraphHeading.HEADING1:
        prefix = "<h1>", suffix = "</h1>"; break;
      default: 
        prefix = "<p>", suffix = "</p>";
    }

    if (item.getNumChildren() == 0)
      return "";
  }
  else if (item.getType() == DocumentApp.ElementType.INLINE_IMAGE)
  {
    processImage(item, images, output);
  }
  else if (item.getType() == DocumentApp.ElementType.FOOTNOTE)
  {
    processFootnote(item, output);
  }
  else if (item.getType()===DocumentApp.ElementType.LIST_ITEM) {
    var listItem = item;
    var gt = listItem.getGlyphType();
    var key = listItem.getListId() + '.' + listItem.getNestingLevel();
    var counter = listCounters[key] || 0;

    // First list item
    if ( counter == 0 ) {
      // Bullet list (<ul>):
      if (gt === DocumentApp.GlyphType.BULLET
          || gt === DocumentApp.GlyphType.HOLLOW_BULLET
          || gt === DocumentApp.GlyphType.SQUARE_BULLET) {
        prefix = '<ul><li>', suffix = "</li>";

          suffix += "</ul>";
        }
      else {
        // Ordered list (<ol>):
        prefix = "<ol><li>", suffix = "</li>";
      }
    }
    else {
      prefix = "<li>";
      suffix = "</li>";
    }

    if (item.isAtDocumentEnd() || (item.getNextSibling() && (item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM))) {
      if (gt === DocumentApp.GlyphType.BULLET
          || gt === DocumentApp.GlyphType.HOLLOW_BULLET
          || gt === DocumentApp.GlyphType.SQUARE_BULLET) {
        suffix += "</ul>";
      }
      else {
        // Ordered list (<ol>):
        suffix += "</ol>";
      }

    }

    counter++;
    listCounters[key] = counter;
  }

  output.push(prefix);

  if (item.getType() == DocumentApp.ElementType.TEXT) {
    processText(item, output);
  }
  else {


    if (item.getNumChildren) {
      var numChildren = item.getNumChildren();

      // Walk through all the child elements of the doc.
      for (var i = 0; i < numChildren; i++) {
        var child = item.getChild(i);
        output.push(processItem(child, listCounters, images));
      }
    }

  }

  output.push(suffix);
  return output.join('');
 }


 function processText(item, output) {
  // Process a text item and adds necessary tags
  var text = item.getText();
  var indices = item.getTextAttributeIndices();

  if (indices.length <= 1) {
    // Assuming that a whole para fully italic is a quote
    if(item.isBold()) {
      output.push('<strong>' + text + '</strong>');
    }
    else if(item.isItalic()) {
      output.push('<blockquote>' + text + '</blockquote>');
    }
    else if (text.trim().indexOf('http://') == 0) {
      output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
    }
    else if (text.trim().indexOf('https://') == 0) {
      output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
    }
    else {
      output.push(text);
    }
  }
  else {

    for (var i=0; i < indices.length; i ++) {
      var partAtts = item.getAttributes(indices[i]);
      var startPos = indices[i];
      var endPos = i+1 < indices.length ? indices[i+1]: text.length;
      var partText = text.substring(startPos, endPos);

      Logger.log(partText);

      if (partAtts.ITALIC) {
        output.push('<i>');
      }
      if (partAtts.BOLD) {
        output.push('<strong>');
      }
      if (partAtts.UNDERLINE) {
        output.push('<u>');
      }

      // If someone has written [xxx] and made this whole text some special font, like superscript
      // then treat it as a reference and make it superscript.
      // Unfortunately in Google Docs, there's no way to detect superscript
      if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
        output.push('<sup>' + partText + '</sup>');
      }
      else if (partText.trim().indexOf('http://') == 0) {
        output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>');
      }
      else if (partText.trim().indexOf('https://') == 0) {
        output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>');
      }
      else {
        output.push(partText);
      }

      if (partAtts.ITALIC) {
        output.push('</i>');
      }
      if (partAtts.BOLD) {
        output.push('</strong>');
      }
      if (partAtts.UNDERLINE) {
        output.push('</u>');
      }

    }
  }
 }


 function processFootnote(item, output){
  // Process a footnote. This is a container object.
  var footnoteContents = item.getFootnoteContents();
  var footnoteParagraphs = footnoteContents.getParagraphs();
  var loopOutput = [];
  for (var i=0; i < footnoteParagraphs.length; i++) {
    loopOutput.push(processFootnoteItem(footnoteParagraphs[i]));
  }
  // <fn> tags are a custom tag for the Drupal footnotes module
  output.push('<fn>' + loopOutput.join('') + '</fn>');
 }

 function processFootnoteItem(footnoteItem){
  // Process the actual text of the footnote and preserves only bold and italic tags.
  output = [];
    if (footnoteItem.getType() == DocumentApp.ElementType.PARAGRAPH) {
     

      var length = footnoteItem.getNumChildren();

      for (var i=0; i < length; i ++) {
        var item = footnoteItem.getChild(i);
        var text = item.getText();
        var indices = item.getTextAttributeIndices();

      // var text = item.getText();
      // var indices = item.getTextAttributeIndices();

        for (var i=0; i < indices.length; i ++) {
          var partAtts = item.getAttributes(indices[i]);
          var startPos = indices[i];
          var endPos = i+1 < indices.length ? indices[i+1]: text.length;
          var partText = text.substring(startPos, endPos);

          Logger.log(partText);

          if (partAtts.ITALIC) {
            output.push('<i>');
          }
          if (partAtts.BOLD) {
            output.push('<strong>');
          }

          // If someone has written [xxx] and made this whole text some special font, like superscript
          // then treat it as a reference and make it superscript.
          // Unfortunately in Google Docs, there's no way to detect superscript
          if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
            output.push('<sup>' + partText + '</sup>');
          }
          else {
            output.push(partText);
          }

          if (partAtts.ITALIC) {
            output.push('</i>');
          }
          if (partAtts.BOLD) {
            output.push('</strong>');
          }

        }

      }

    }
      return output.join('');
 }

 function processImage(item, images, output) {
  // Processes an image
  images = images || [];
  var blob = item.getBlob();
  var contentType = blob.getContentType();
  var extension = "";
  if (/\/png$/.test(contentType)) {
    extension = ".png";
  } else if (/\/gif$/.test(contentType)) {
    extension = ".gif";
  } else if (/\/jpe?g$/.test(contentType)) {
    extension = ".jpg";
  } else {
    throw "Unsupported image type: "+contentType;
  }
  var imagePrefix = "Image_";
  var imageCounter = images.length;
  var name = imagePrefix + imageCounter + extension;
  imageCounter++;
  output.push('<img src="cid:'+name+'" />');
  images.push( {
    "blob": blob,
    "type": contentType,
    "name": name});
 }
	// This script converts the active Google Doc to HTML with custom formatting for
	// the Drupal footnotes module (https://www.drupal.org/project/footnotes)


	function onOpen(e) {
	DocumentApp.getUi().createAddonMenu().addItem('Convert', 'ConvertGoogleDocToCleanHtml').addToUi();
	}

	function ConvertGoogleDocToCleanHtml() {
	var body = DocumentApp.getActiveDocument().getBody();
	var numChildren = body.getNumChildren();
	var output = [];
	var images = [];
	var listCounters = {};

	// Walk through all the child elements of the body.
	for (var i = 0; i < numChildren; i++) {
	var child = body.getChild(i);
	output.push(processItem(child, listCounters, images));
	}

	var html = output.join('\r');
	emailHtml(html, images);
	// Uncomment this to generate new document instead of sending email
	//createDocumentForHtml(html, images);
	}

	function emailHtml(html, images) {
	// Send an email containing the generated HTML and images as attachments
	var attachments = [];
	for (var j=0; j<images.length; j++) {
	attachments.push( {
	"fileName": images[j].name,
	"mimeType": images[j].type,
	"content": images[j].blob.getBytes() } );
	}

	var inlineImages = {};
	for (var j=0; j<images.length; j++) {
	inlineImages[[images[j].name]] = images[j].blob;
	}

	var name = DocumentApp.getActiveDocument().getName()+".html";
	attachments.push({"fileName":name, "mimeType": "text/html", "content": html});
	MailApp.sendEmail({
	to: Session.getActiveUser().getEmail(),
	subject: name,
	htmlBody: html,
	inlineImages: inlineImages,
	attachments: attachments
	});
	}

	function createDocumentForHtml(html, images) {
	// Create a new Google Doc containing the generated HTML. This is an alternative to emailHtml
	var name = DocumentApp.getActiveDocument().getName()+".html";
	var newDoc = DocumentApp.create(name);
	newDoc.getBody().setText(html);
	for(var j=0; j < images.length; j++)
	newDoc.getBody().appendImage(images[j].blob);
	newDoc.saveAndClose();
	}

	function dumpAttributes(atts) {
	// Log the paragraph attributes.
	for (var att in atts) {
	Logger.log(att + ":" + atts[att]);
	}
	}

	function processItem(item, listCounters, images) {
	// Process an individual item in the document. Calls more specific functions for text, footnotes and images.
	var output = [];
	var prefix = "", suffix = "";
	if (item.getType() == DocumentApp.ElementType.PARAGRAPH) {
	switch (item.getHeading()) {
	// Add a # for each heading level. No break, so we accumulate the right number.
	case DocumentApp.ParagraphHeading.HEADING6:
	prefix = "<h6>", suffix = "</h6>"; break;
	case DocumentApp.ParagraphHeading.HEADING5:
	prefix = "<h5>", suffix = "</h5>"; break;
	case DocumentApp.ParagraphHeading.HEADING4:
	prefix = "<h4>", suffix = "</h4>"; break;
	case DocumentApp.ParagraphHeading.HEADING3:
	prefix = "<h3>", suffix = "</h3>"; break;
	case DocumentApp.ParagraphHeading.HEADING2:
	prefix = "<h2>", suffix = "</h2>"; break;
	case DocumentApp.ParagraphHeading.HEADING1:
	prefix = "<h1>", suffix = "</h1>"; break;
	default:
	prefix = "<p>", suffix = "</p>";
	}

	if (item.getNumChildren() == 0)
	return "";
	}
	else if (item.getType() == DocumentApp.ElementType.INLINE_IMAGE)
	{
	processImage(item, images, output);
	}
	else if (item.getType() == DocumentApp.ElementType.FOOTNOTE)
	{
	processFootnote(item, output);
	}
	else if (item.getType()===DocumentApp.ElementType.LIST_ITEM) {
	var listItem = item;
	var gt = listItem.getGlyphType();
	var key = listItem.getListId() + '.' + listItem.getNestingLevel();
	var counter = listCounters[key] \|\| 0;

	// First list item
	if ( counter == 0 ) {
	// Bullet list (<ul>):
	if (gt === DocumentApp.GlyphType.BULLET
	\|\| gt === DocumentApp.GlyphType.HOLLOW_BULLET
	\|\| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
	prefix = '<ul><li>', suffix = "</li>";

	suffix += "</ul>";
	}
	else {
	// Ordered list (<ol>):
	prefix = "<ol><li>", suffix = "</li>";
	}
	}
	else {
	prefix = "<li>";
	suffix = "</li>";
	}

	if (item.isAtDocumentEnd() \|\| (item.getNextSibling() && (item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM))) {
	if (gt === DocumentApp.GlyphType.BULLET
	\|\| gt === DocumentApp.GlyphType.HOLLOW_BULLET
	\|\| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
	suffix += "</ul>";
	}
	else {
	// Ordered list (<ol>):
	suffix += "</ol>";
	}

	}

	counter++;
	listCounters[key] = counter;
	}

	output.push(prefix);

	if (item.getType() == DocumentApp.ElementType.TEXT) {
	processText(item, output);
	}
	else {


	if (item.getNumChildren) {
	var numChildren = item.getNumChildren();

	// Walk through all the child elements of the doc.
	for (var i = 0; i < numChildren; i++) {
	var child = item.getChild(i);
	output.push(processItem(child, listCounters, images));
	}
	}

	}

	output.push(suffix);
	return output.join('');
	}


	function processText(item, output) {
	// Process a text item and adds necessary tags
	var text = item.getText();
	var indices = item.getTextAttributeIndices();

	if (indices.length <= 1) {
	// Assuming that a whole para fully italic is a quote
	if(item.isBold()) {
	output.push('<strong>' + text + '</strong>');
	}
	else if(item.isItalic()) {
	output.push('<blockquote>' + text + '</blockquote>');
	}
	else if (text.trim().indexOf('http://') == 0) {
	output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
	}
	else if (text.trim().indexOf('https://') == 0) {
	output.push('<a href="' + text + '" rel="nofollow">' + text + '</a>');
	}
	else {
	output.push(text);
	}
	}
	else {

	for (var i=0; i < indices.length; i ++) {
	var partAtts = item.getAttributes(indices[i]);
	var startPos = indices[i];
	var endPos = i+1 < indices.length ? indices[i+1]: text.length;
	var partText = text.substring(startPos, endPos);

	Logger.log(partText);

	if (partAtts.ITALIC) {
	output.push('<i>');
	}
	if (partAtts.BOLD) {
	output.push('<strong>');
	}
	if (partAtts.UNDERLINE) {
	output.push('<u>');
	}

	// If someone has written [xxx] and made this whole text some special font, like superscript
	// then treat it as a reference and make it superscript.
	// Unfortunately in Google Docs, there's no way to detect superscript
	if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
	output.push('<sup>' + partText + '</sup>');
	}
	else if (partText.trim().indexOf('http://') == 0) {
	output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>');
	}
	else if (partText.trim().indexOf('https://') == 0) {
	output.push('<a href="' + partText + '" rel="nofollow">' + partText + '</a>');
	}
	else {
	output.push(partText);
	}

	if (partAtts.ITALIC) {
	output.push('</i>');
	}
	if (partAtts.BOLD) {
	output.push('</strong>');
	}
	if (partAtts.UNDERLINE) {
	output.push('</u>');
	}

	}
	}
	}


	function processFootnote(item, output){
	// Process a footnote. This is a container object.
	var footnoteContents = item.getFootnoteContents();
	var footnoteParagraphs = footnoteContents.getParagraphs();
	var loopOutput = [];
	for (var i=0; i < footnoteParagraphs.length; i++) {
	loopOutput.push(processFootnoteItem(footnoteParagraphs[i]));
	}
	// <fn> tags are a custom tag for the Drupal footnotes module
	output.push('<fn>' + loopOutput.join('') + '</fn>');
	}

	function processFootnoteItem(footnoteItem){
	// Process the actual text of the footnote and preserves only bold and italic tags.
	output = [];
	if (footnoteItem.getType() == DocumentApp.ElementType.PARAGRAPH) {


	var length = footnoteItem.getNumChildren();

	for (var i=0; i < length; i ++) {
	var item = footnoteItem.getChild(i);
	var text = item.getText();
	var indices = item.getTextAttributeIndices();

	// var text = item.getText();
	// var indices = item.getTextAttributeIndices();

	for (var i=0; i < indices.length; i ++) {
	var partAtts = item.getAttributes(indices[i]);
	var startPos = indices[i];
	var endPos = i+1 < indices.length ? indices[i+1]: text.length;
	var partText = text.substring(startPos, endPos);

	Logger.log(partText);

	if (partAtts.ITALIC) {
	output.push('<i>');
	}
	if (partAtts.BOLD) {
	output.push('<strong>');
	}

	// If someone has written [xxx] and made this whole text some special font, like superscript
	// then treat it as a reference and make it superscript.
	// Unfortunately in Google Docs, there's no way to detect superscript
	if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
	output.push('<sup>' + partText + '</sup>');
	}
	else {
	output.push(partText);
	}

	if (partAtts.ITALIC) {
	output.push('</i>');
	}
	if (partAtts.BOLD) {
	output.push('</strong>');
	}

	}

	}

	}
	return output.join('');
	}

	function processImage(item, images, output) {
	// Processes an image
	images = images \|\| [];
	var blob = item.getBlob();
	var contentType = blob.getContentType();
	var extension = "";
	if (/\/png$/.test(contentType)) {
	extension = ".png";
	} else if (/\/gif$/.test(contentType)) {
	extension = ".gif";
	} else if (/\/jpe?g$/.test(contentType)) {
	extension = ".jpg";
	} else {
	throw "Unsupported image type: "+contentType;
	}
	var imagePrefix = "Image_";
	var imageCounter = images.length;
	var name = imagePrefix + imageCounter + extension;
	imageCounter++;
	output.push('<img src="cid:'+name+'" />');
	images.push( {
	"blob": blob,
	"type": contentType,
	"name": name});
	}