Created
July 29, 2011 06:40
-
-
Save avinasha/1113327 to your computer and use it in GitHub Desktop.
A Sanitize gem transformer which sanitizes any CSS in a HTML document.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
check_css = lambda { |env| | |
node = env[:node] | |
node_name = env[:node_name] | |
# Don't continue if this node is already whitelisted or is not an element. | |
return if env[:is_whitelisted] || !node.element? | |
parent = node.parent | |
return unless node_name == 'style' || node['style'] | |
if node_name == 'style' | |
unless good_css? node.content | |
node.unlink | |
return | |
end | |
else | |
unless good_css? node['style'] | |
node.unlink | |
return | |
end | |
end | |
{:node_whitelist => [node]} | |
} | |
def good_css? text | |
return false if text =~ /(\w\/\/)/ # a// comment immediately following a letter | |
return false if text =~ /(\w\/\/*\*)/ # a/* comment immediately following a letter | |
return false if text =~ /(\/\*\/)/ # /*/ --> hack attempt, IMO | |
# Now, strip out any comments, and do some parsing. | |
no_comments = text.gsub(/(\/\*.*?\*\/)/, "") # filter out any /* ... */ | |
no_comments.gsub!("\n", "") | |
# No backslashes allowed | |
evil = [ | |
/(\bdata:\b|eval|cookie|\bwindow\b|\bparent\b|\bthis\b)/i, # suspicious javascript-type words | |
/behaviou?r|expression|moz-binding|@import|@charset|(java|vb)?script|[\<]|\\\w/i, | |
/[\<>]/, # back slash, html tags, | |
/[\x7f-\xff]/, # high bytes -- suspect | |
/[\x00-\x08\x0B\x0C\x0E-\x1F]/, #low bytes -- suspect | |
/&\#/, # bad charset | |
] | |
evil.each { |regex| return false if no_comments =~ regex } | |
true | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi. I'm found a bug. s/node_whitelist/whitelist_node/g fixed in my fork.