Last active
November 1, 2022 03:23
-
-
Save mbijon/1098477 to your computer and use it in GitHub Desktop.
XSS filtering in PHP (cleans various UTF encodings & nested exploits)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* XSS filter, recursively handles HTML tags & UTF encoding | |
* Optionally handles base64 encoding | |
* | |
* ***DEPRECATION RECOMMENDED*** Not updated or maintained since 2011 | |
* A MAINTAINED & BETTER ALTERNATIVE => kses | |
* https://github.com/RichardVasquez/kses/ | |
* | |
* This was built from numerous sources | |
* (thanks all, sorry I didn't track to credit you) | |
* | |
* It was tested against *most* exploits here: http://ha.ckers.org/xss.html | |
* WARNING: Some weren't tested!!! | |
* Those include the Actionscript and SSI samples, or any newer than Jan 2011 | |
* | |
*/ | |
class xssClean { | |
/* | |
* Recursive worker to strip risky elements | |
* | |
* @param string $input Content to be cleaned. It MAY be modified in output | |
* @return string $output Modified $input string | |
*/ | |
public function clean_input( $input, $safe_level = 0 ) { | |
$output = $input; | |
do { | |
// Treat $input as buffer on each loop, faster than new var | |
$input = $output; | |
// Remove unwanted tags | |
$output = $this->strip_tags( $input ); | |
$output = $this->strip_encoded_entities( $output ); | |
// Use 2nd input param if not empty or '0' | |
if ( $safe_level !== 0 ) { | |
$output = $this->strip_base64( $output ); | |
} | |
} while ( $output !== $input ); | |
return $output; | |
} | |
/* | |
* Focuses on stripping encoded entities | |
* *** This appears to be why people use this sample code. Unclear how well Kses does this *** | |
* | |
* @param string $input Content to be cleaned. It MAY be modified in output | |
* @return string $input Modified $input string | |
*/ | |
private function strip_encoded_entities( $input ) { | |
// Fix &entity\n; | |
$input = str_replace(array('&','<','>'), array('&amp;','&lt;','&gt;'), $input); | |
$input = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $input); | |
$input = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $input); | |
$input = html_entity_decode($input, ENT_COMPAT, 'UTF-8'); | |
// Remove any attribute starting with "on" or xmlns | |
$input = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+[>\b]?#iu', '$1>', $input); | |
// Remove javascript: and vbscript: protocols | |
$input = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $input); | |
$input = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $input); | |
$input = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $input); | |
// Only works in IE: <span style="width: expression(alert('Ping!'));"></span> | |
$input = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $input); | |
$input = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $input); | |
$input = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $input); | |
return $input; | |
} | |
/* | |
* Focuses on stripping unencoded HTML tags & namespaces | |
* | |
* @param string $input Content to be cleaned. It MAY be modified in output | |
* @return string $input Modified $input string | |
*/ | |
private function strip_tags( $input ) { | |
// Remove tags | |
$input = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|embed|frame(?:set)?|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#i', '', $input); | |
// Remove namespaced elements | |
$input = preg_replace('#</*\w+:\w[^>]*+>#i', '', $input); | |
return $input; | |
} | |
/* | |
* Focuses on stripping entities from Base64 encoded strings | |
* | |
* NOT ENABLED by default! | |
* To enable 2nd param of clean_input() can be set to anything other than 0 or '0': | |
* ie: xssClean->clean_input( $input_string, 1 ) | |
* | |
* @param string $input Maybe Base64 encoded string | |
* @return string $output Modified & re-encoded $input string | |
*/ | |
private function strip_base64( $input ) { | |
$decoded = base64_decode( $input ); | |
$decoded = $this->strip_tags( $decoded ); | |
$decoded = $this->strip_encoded_entities( $decoded ); | |
$output = base64_encode( $decoded ); | |
return $output; | |
} | |
} |
@nat4tq share your paper with us then :)
@nat4tq I tried the code you provided on the local server. This is really a problem. Do you have any solutions?
@nat4tq I tried the code you provided on the local server. This is really a problem. Do you have any solutions?
You could try this library : https://github.com/voku/anti-xss
My method is deprecated. I now recommend http://htmlpurifier.org/
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I was testing your filter against a set of XSS test inputs. It seems that your filter is still vulnerable to XSS such as with inputs that contain XSS payloads in the comment-type, anchor and image tags. Examples of one of each are:
<!--#exec cmd=""/usr/X11R6/bin/xterm ?display 127.0.0.1:0 &""-->
<a href="jAvAsCrIpT:alert(1)">X</a>
/><img/onerror=\x09javascript:alert(1)\x09src=xxx:x />
A full report can be read in our paper, "Assessment of Dynamic Open-source Cross-site Scripting Filters as Security Devices in Web Applications". I kindly suggest that you add these tags to the blacklist to make it more robust against XSS.
Thank you.