geoffgarside · August 29, 2015 14:24
diff --git a/html_email_preview.js b/html_email_preview.js
 (function() {
    /* First try to handle pages which are actually raw text of the email.
       Extract the HTML part and replace page with it */
    var orig_html = document.getElementsByTagName('html')[0].textContent;
    var extracted_html = orig_html;
    /* Try splitting it up if it's actually the multipart email. Otherwise, work
       on the document itself, leaving the orig_html in place */
    var boundary_pattern = '--===============';
    while (extracted_html.indexOf(boundary_pattern) != -1) {
        var next_boundary = extracted_html.indexOf(boundary_pattern);
        var next_block = extracted_html.substr(0, next_boundary);
        /* If this block contains the html use it */
        var html_pos = next_block.indexOf('<html');
        if (html_pos != -1) {
            var html_end_pos = next_block.indexOf('/html>');
            extracted_html = next_block.substr(html_pos, html_end_pos-html_pos+6);
            break;
        }
        /* Otherwise, continue on next block. We need to make sure we get rid of
           the boundary in the process */
        var new_start_idx = extracted_html.indexOf('\n', next_boundary);
        extracted_html = extracted_html.substr(new_start_idx+1);
    }

    /* Put the replacement in place*/
    if (extracted_html != orig_html) {
        document.write(extracted_html);
    }

    /*Now run through the document clearing out data we shouldn't have. Ideally
    this would match the process that email clients follow. Something like GMail
    or Yahoo Mail, where the data is embedded directly in another page, needs to
    do the most aggressive filtering, so we want to match something like
    that. Our first step is removing entire tags. */
    var excluded_tags = ['head', 'style', 'link'];
    for(var ex_i = 0; ex_i < excluded_tags.length; ex_i++) {
        var ex_elems = document.getElementsByTagName(excluded_tags[ex_i]);
        for (var exe_i = 0; exe_i < ex_elems.length; exe_i++) {
            var node = ex_elems[exe_i];
            node.parentNode.removeChild(node);
        }
    }

    /*And remove attributes that we can't verify. We don't have a complete
      list, so we filter out attributes only for tags we generate an explicit
      list for. A blacklist of attributes would be nice, but since the possible
      list of tags is ever growing and people generate non-conforming HTML for
      emails, we can't do that.

      Some global attributes are always permitted. Each attribute is
      treated as a prefix so we can match generic sets of tags. Finally, we also
      have list of globally explicitly  attributes that should always be
      stripped. */
    var global_attributes = ['accesskey', 'contenteditable',
      'contextmenu', 'data-', 'dir', 'draggable', 'dropzone', 'hidden',
      'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype', 'lang',
      'spellcheck', 'style', 'tabindex', 'title'];
    var valid_attributes = {
        'table': ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing',
                  'frame', 'rules', 'width'],
        'tbody': ['align', 'bgcolor', 'valign'],
        'tr': ['align', 'bgcolor', 'valign'],
        'td': ['align', 'bgcolor', 'colspan', 'rowspan', 'valign'],

        'img': ['align', 'alt', 'border', 'height', 'src', 'width'],
    };
    var always_strip_attributes = ['id', 'class'];

    var all_elems = document.getElementsByTagName('*');
    for(var elem_i = 0; elem_i < all_elems.length; elem_i++) {
        var elem = all_elems[elem_i];
        var attribs_to_remove = [];
        for(var i = 0; i < elem.attributes.length; i++) {
            var attrib = elem.attributes[i];
            var done = false;
            if (!attrib.specified)
                continue;
            /* First check if it's in the "always strip" list */
            for(var ai = 0; ai < always_strip_attributes.length; ai++) {
                if (always_strip_attributes[ai] == attrib.name) {
                    attribs_to_remove.push(attrib.name);
                    done = true;
                    break;
                }
            }
            if (done) continue;

            /* Next check if it's one of the valid global
               attributes. If it is, we let it pass */
            var tag_valid_attributes = valid_attributes[elem.tagName.toLowerCase()];
            if (!tag_valid_attributes) continue;
            for(var ai = 0; ai < global_attributes.length; ai++) {
                var global_attrib_prefix = global_attributes[ai];
                if (attrib.name.indexOf(global_attrib_prefix) == 0) {
                    /* Setting done & not adding to the list lets it
                       pass */
                    done = true;
                    break;
                }
            }
            if (done) continue;

            /* Finally, if we have a filter on the element, we can filter based
               on its valid elements */
            for(var ai = 0; ai < tag_valid_attributes.length; ai++) {
                var valid_attrib = tag_valid_attributes[ai];
                if (valid_attrib == attrib.name) {
                    done = true;
                    break;
                }
            }
            if (done) continue;
            /* If we didn't continue already, then the attribute wasn't in the
               safe list. */
            attribs_to_remove.push(attrib.name);
        }

        /* After finishing iterating over them, remove the ones we
           discovered */
        for(var ai = 0; ai < attribs_to_remove.length; ai++)
            elem.removeAttribute(attribs_to_remove[ai]);
    }

    /* And we need to remove any restricted styles. I haven't done any of this yet... */

 })();
	(function() {
	/* First try to handle pages which are actually raw text of the email.
	Extract the HTML part and replace page with it */
	var orig_html = document.getElementsByTagName('html')[0].textContent;
	var extracted_html = orig_html;
	/* Try splitting it up if it's actually the multipart email. Otherwise, work
	on the document itself, leaving the orig_html in place */
	var boundary_pattern = '--===============';
	while (extracted_html.indexOf(boundary_pattern) != -1) {
	var next_boundary = extracted_html.indexOf(boundary_pattern);
	var next_block = extracted_html.substr(0, next_boundary);
	/* If this block contains the html use it */
	var html_pos = next_block.indexOf('<html');
	if (html_pos != -1) {
	var html_end_pos = next_block.indexOf('/html>');
	extracted_html = next_block.substr(html_pos, html_end_pos-html_pos+6);
	break;
	}
	/* Otherwise, continue on next block. We need to make sure we get rid of
	the boundary in the process */
	var new_start_idx = extracted_html.indexOf('\n', next_boundary);
	extracted_html = extracted_html.substr(new_start_idx+1);
	}

	/* Put the replacement in place*/
	if (extracted_html != orig_html) {
	document.write(extracted_html);
	}

	/*Now run through the document clearing out data we shouldn't have. Ideally
	this would match the process that email clients follow. Something like GMail
	or Yahoo Mail, where the data is embedded directly in another page, needs to
	do the most aggressive filtering, so we want to match something like
	that. Our first step is removing entire tags. */
	var excluded_tags = ['head', 'style', 'link'];
	for(var ex_i = 0; ex_i < excluded_tags.length; ex_i++) {
	var ex_elems = document.getElementsByTagName(excluded_tags[ex_i]);
	for (var exe_i = 0; exe_i < ex_elems.length; exe_i++) {
	var node = ex_elems[exe_i];
	node.parentNode.removeChild(node);
	}
	}

	/*And remove attributes that we can't verify. We don't have a complete
	list, so we filter out attributes only for tags we generate an explicit
	list for. A blacklist of attributes would be nice, but since the possible
	list of tags is ever growing and people generate non-conforming HTML for
	emails, we can't do that.

	Some global attributes are always permitted. Each attribute is
	treated as a prefix so we can match generic sets of tags. Finally, we also
	have list of globally explicitly attributes that should always be
	stripped. */
	var global_attributes = ['accesskey', 'contenteditable',
	'contextmenu', 'data-', 'dir', 'draggable', 'dropzone', 'hidden',
	'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype', 'lang',
	'spellcheck', 'style', 'tabindex', 'title'];
	var valid_attributes = {
	'table': ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing',
	'frame', 'rules', 'width'],
	'tbody': ['align', 'bgcolor', 'valign'],
	'tr': ['align', 'bgcolor', 'valign'],
	'td': ['align', 'bgcolor', 'colspan', 'rowspan', 'valign'],

	'img': ['align', 'alt', 'border', 'height', 'src', 'width'],
	};
	var always_strip_attributes = ['id', 'class'];

	var all_elems = document.getElementsByTagName('*');
	for(var elem_i = 0; elem_i < all_elems.length; elem_i++) {
	var elem = all_elems[elem_i];
	var attribs_to_remove = [];
	for(var i = 0; i < elem.attributes.length; i++) {
	var attrib = elem.attributes[i];
	var done = false;
	if (!attrib.specified)
	continue;
	/* First check if it's in the "always strip" list */
	for(var ai = 0; ai < always_strip_attributes.length; ai++) {
	if (always_strip_attributes[ai] == attrib.name) {
	attribs_to_remove.push(attrib.name);
	done = true;
	break;
	}
	}
	if (done) continue;

	/* Next check if it's one of the valid global
	attributes. If it is, we let it pass */
	var tag_valid_attributes = valid_attributes[elem.tagName.toLowerCase()];
	if (!tag_valid_attributes) continue;
	for(var ai = 0; ai < global_attributes.length; ai++) {
	var global_attrib_prefix = global_attributes[ai];
	if (attrib.name.indexOf(global_attrib_prefix) == 0) {
	/* Setting done & not adding to the list lets it
	pass */
	done = true;
	break;
	}
	}
	if (done) continue;

	/* Finally, if we have a filter on the element, we can filter based
	on its valid elements */
	for(var ai = 0; ai < tag_valid_attributes.length; ai++) {
	var valid_attrib = tag_valid_attributes[ai];
	if (valid_attrib == attrib.name) {
	done = true;
	break;
	}
	}
	if (done) continue;
	/* If we didn't continue already, then the attribute wasn't in the
	safe list. */
	attribs_to_remove.push(attrib.name);
	}

	/* After finishing iterating over them, remove the ones we
	discovered */
	for(var ai = 0; ai < attribs_to_remove.length; ai++)
	elem.removeAttribute(attribs_to_remove[ai]);
	}

	/* And we need to remove any restricted styles. I haven't done any of this yet... */

	})();
No results found