Skip to content

Instantly share code, notes, and snippets.

@geoffgarside
Forked from ewencp/html_email_preview.js
Last active August 29, 2015 14:24
Show Gist options
  • Save geoffgarside/5d78726ca2ef53277d67 to your computer and use it in GitHub Desktop.
Save geoffgarside/5d78726ca2ef53277d67 to your computer and use it in GitHub Desktop.
(function() {
/* First try to handle pages which are actually raw text of the email.
Extract the HTML part and replace page with it */
var orig_html = document.getElementsByTagName('html')[0].textContent;
var extracted_html = orig_html;
/* Try splitting it up if it's actually the multipart email. Otherwise, work
on the document itself, leaving the orig_html in place */
var boundary_pattern = '--===============';
while (extracted_html.indexOf(boundary_pattern) != -1) {
var next_boundary = extracted_html.indexOf(boundary_pattern);
var next_block = extracted_html.substr(0, next_boundary);
/* If this block contains the html use it */
var html_pos = next_block.indexOf('<html');
if (html_pos != -1) {
var html_end_pos = next_block.indexOf('/html>');
extracted_html = next_block.substr(html_pos, html_end_pos-html_pos+6);
break;
}
/* Otherwise, continue on next block. We need to make sure we get rid of
the boundary in the process */
var new_start_idx = extracted_html.indexOf('\n', next_boundary);
extracted_html = extracted_html.substr(new_start_idx+1);
}
/* Put the replacement in place*/
if (extracted_html != orig_html) {
document.write(extracted_html);
}
/*Now run through the document clearing out data we shouldn't have. Ideally
this would match the process that email clients follow. Something like GMail
or Yahoo Mail, where the data is embedded directly in another page, needs to
do the most aggressive filtering, so we want to match something like
that. Our first step is removing entire tags. */
var excluded_tags = ['head', 'style', 'link'];
for(var ex_i = 0; ex_i < excluded_tags.length; ex_i++) {
var ex_elems = document.getElementsByTagName(excluded_tags[ex_i]);
for (var exe_i = 0; exe_i < ex_elems.length; exe_i++) {
var node = ex_elems[exe_i];
node.parentNode.removeChild(node);
}
}
/*And remove attributes that we can't verify. We don't have a complete
list, so we filter out attributes only for tags we generate an explicit
list for. A blacklist of attributes would be nice, but since the possible
list of tags is ever growing and people generate non-conforming HTML for
emails, we can't do that.
Some global attributes are always permitted. Each attribute is
treated as a prefix so we can match generic sets of tags. Finally, we also
have list of globally explicitly attributes that should always be
stripped. */
var global_attributes = ['accesskey', 'contenteditable',
'contextmenu', 'data-', 'dir', 'draggable', 'dropzone', 'hidden',
'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype', 'lang',
'spellcheck', 'style', 'tabindex', 'title'];
var valid_attributes = {
'table': ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing',
'frame', 'rules', 'width'],
'tbody': ['align', 'bgcolor', 'valign'],
'tr': ['align', 'bgcolor', 'valign'],
'td': ['align', 'bgcolor', 'colspan', 'rowspan', 'valign'],
'img': ['align', 'alt', 'border', 'height', 'src', 'width'],
};
var always_strip_attributes = ['id', 'class'];
var all_elems = document.getElementsByTagName('*');
for(var elem_i = 0; elem_i < all_elems.length; elem_i++) {
var elem = all_elems[elem_i];
var attribs_to_remove = [];
for(var i = 0; i < elem.attributes.length; i++) {
var attrib = elem.attributes[i];
var done = false;
if (!attrib.specified)
continue;
/* First check if it's in the "always strip" list */
for(var ai = 0; ai < always_strip_attributes.length; ai++) {
if (always_strip_attributes[ai] == attrib.name) {
attribs_to_remove.push(attrib.name);
done = true;
break;
}
}
if (done) continue;
/* Next check if it's one of the valid global
attributes. If it is, we let it pass */
var tag_valid_attributes = valid_attributes[elem.tagName.toLowerCase()];
if (!tag_valid_attributes) continue;
for(var ai = 0; ai < global_attributes.length; ai++) {
var global_attrib_prefix = global_attributes[ai];
if (attrib.name.indexOf(global_attrib_prefix) == 0) {
/* Setting done & not adding to the list lets it
pass */
done = true;
break;
}
}
if (done) continue;
/* Finally, if we have a filter on the element, we can filter based
on its valid elements */
for(var ai = 0; ai < tag_valid_attributes.length; ai++) {
var valid_attrib = tag_valid_attributes[ai];
if (valid_attrib == attrib.name) {
done = true;
break;
}
}
if (done) continue;
/* If we didn't continue already, then the attribute wasn't in the
safe list. */
attribs_to_remove.push(attrib.name);
}
/* After finishing iterating over them, remove the ones we
discovered */
for(var ai = 0; ai < attribs_to_remove.length; ai++)
elem.removeAttribute(attribs_to_remove[ai]);
}
/* And we need to remove any restricted styles. I haven't done any of this yet... */
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment