-
-
Save geoffgarside/5d78726ca2ef53277d67 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
/* First try to handle pages which are actually raw text of the email. | |
Extract the HTML part and replace page with it */ | |
var orig_html = document.getElementsByTagName('html')[0].textContent; | |
var extracted_html = orig_html; | |
/* Try splitting it up if it's actually the multipart email. Otherwise, work | |
on the document itself, leaving the orig_html in place */ | |
var boundary_pattern = '--==============='; | |
while (extracted_html.indexOf(boundary_pattern) != -1) { | |
var next_boundary = extracted_html.indexOf(boundary_pattern); | |
var next_block = extracted_html.substr(0, next_boundary); | |
/* If this block contains the html use it */ | |
var html_pos = next_block.indexOf('<html'); | |
if (html_pos != -1) { | |
var html_end_pos = next_block.indexOf('/html>'); | |
extracted_html = next_block.substr(html_pos, html_end_pos-html_pos+6); | |
break; | |
} | |
/* Otherwise, continue on next block. We need to make sure we get rid of | |
the boundary in the process */ | |
var new_start_idx = extracted_html.indexOf('\n', next_boundary); | |
extracted_html = extracted_html.substr(new_start_idx+1); | |
} | |
/* Put the replacement in place*/ | |
if (extracted_html != orig_html) { | |
document.write(extracted_html); | |
} | |
/*Now run through the document clearing out data we shouldn't have. Ideally | |
this would match the process that email clients follow. Something like GMail | |
or Yahoo Mail, where the data is embedded directly in another page, needs to | |
do the most aggressive filtering, so we want to match something like | |
that. Our first step is removing entire tags. */ | |
var excluded_tags = ['head', 'style', 'link']; | |
for(var ex_i = 0; ex_i < excluded_tags.length; ex_i++) { | |
var ex_elems = document.getElementsByTagName(excluded_tags[ex_i]); | |
for (var exe_i = 0; exe_i < ex_elems.length; exe_i++) { | |
var node = ex_elems[exe_i]; | |
node.parentNode.removeChild(node); | |
} | |
} | |
/*And remove attributes that we can't verify. We don't have a complete | |
list, so we filter out attributes only for tags we generate an explicit | |
list for. A blacklist of attributes would be nice, but since the possible | |
list of tags is ever growing and people generate non-conforming HTML for | |
emails, we can't do that. | |
Some global attributes are always permitted. Each attribute is | |
treated as a prefix so we can match generic sets of tags. Finally, we also | |
have list of globally explicitly attributes that should always be | |
stripped. */ | |
var global_attributes = ['accesskey', 'contenteditable', | |
'contextmenu', 'data-', 'dir', 'draggable', 'dropzone', 'hidden', | |
'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype', 'lang', | |
'spellcheck', 'style', 'tabindex', 'title']; | |
var valid_attributes = { | |
'table': ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing', | |
'frame', 'rules', 'width'], | |
'tbody': ['align', 'bgcolor', 'valign'], | |
'tr': ['align', 'bgcolor', 'valign'], | |
'td': ['align', 'bgcolor', 'colspan', 'rowspan', 'valign'], | |
'img': ['align', 'alt', 'border', 'height', 'src', 'width'], | |
}; | |
var always_strip_attributes = ['id', 'class']; | |
var all_elems = document.getElementsByTagName('*'); | |
for(var elem_i = 0; elem_i < all_elems.length; elem_i++) { | |
var elem = all_elems[elem_i]; | |
var attribs_to_remove = []; | |
for(var i = 0; i < elem.attributes.length; i++) { | |
var attrib = elem.attributes[i]; | |
var done = false; | |
if (!attrib.specified) | |
continue; | |
/* First check if it's in the "always strip" list */ | |
for(var ai = 0; ai < always_strip_attributes.length; ai++) { | |
if (always_strip_attributes[ai] == attrib.name) { | |
attribs_to_remove.push(attrib.name); | |
done = true; | |
break; | |
} | |
} | |
if (done) continue; | |
/* Next check if it's one of the valid global | |
attributes. If it is, we let it pass */ | |
var tag_valid_attributes = valid_attributes[elem.tagName.toLowerCase()]; | |
if (!tag_valid_attributes) continue; | |
for(var ai = 0; ai < global_attributes.length; ai++) { | |
var global_attrib_prefix = global_attributes[ai]; | |
if (attrib.name.indexOf(global_attrib_prefix) == 0) { | |
/* Setting done & not adding to the list lets it | |
pass */ | |
done = true; | |
break; | |
} | |
} | |
if (done) continue; | |
/* Finally, if we have a filter on the element, we can filter based | |
on its valid elements */ | |
for(var ai = 0; ai < tag_valid_attributes.length; ai++) { | |
var valid_attrib = tag_valid_attributes[ai]; | |
if (valid_attrib == attrib.name) { | |
done = true; | |
break; | |
} | |
} | |
if (done) continue; | |
/* If we didn't continue already, then the attribute wasn't in the | |
safe list. */ | |
attribs_to_remove.push(attrib.name); | |
} | |
/* After finishing iterating over them, remove the ones we | |
discovered */ | |
for(var ai = 0; ai < attribs_to_remove.length; ai++) | |
elem.removeAttribute(attribs_to_remove[ai]); | |
} | |
/* And we need to remove any restricted styles. I haven't done any of this yet... */ | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment