Created
April 19, 2012 15:41
-
-
Save meglio/2421812 to your computer and use it in GitHub Desktop.
PHP class to strip javascripts from html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class StripJS { | |
const JS_ALLOW_HREF = 1; | |
const JS_ALLOW_SCRIPT = 2; | |
const JS_ALLOW_ATTR_ONX = 4; | |
/** | |
* Strips javascript from page. $allowed is bitwise combination of JS_ALLOW_HREF, JS_ALLOW_SCRIPT, JS_ALLOW_ATTR_ONX | |
* 0 - strip everything (no js allowed) | |
* 1 - strip href=... | |
* | |
* @static | |
* @param $html | |
* @param int $allowed | |
* @return mixed | |
*/ | |
static function stripJavascript($html, $allowed = 0) | |
{ | |
if ( ($allowed & self::JS_ALLOW_HREF) == 0) // 1 href=... | |
{ | |
# Totally remove href attribute which starts with javascript: | |
$regex = <<<'REGEX' | |
#href\s*=\s*('|")\s*javascript\s*:.*?\1#is | |
REGEX; | |
$html = preg_replace($regex, '', $html); | |
} | |
if ( ($allowed & self::JS_ALLOW_SCRIPT) == 0) // 2 <script.... | |
{ | |
# First remove self-closing script tags: <script ... /> | |
$regex = <<<'REGEX' | |
#<script[^>]+/\s*>#is | |
REGEX; | |
$html = preg_replace($regex, '', $html); | |
# Then remove <script...>...</script> | |
$regex = <<<'REGEX' | |
#<script.*?<\s*/\s*script\s*>#is | |
REGEX; | |
$html = preg_replace($regex, '', $html); | |
} | |
if ( ($allowed & self::JS_ALLOW_ATTR_ONX) == 0) // 4 <tag on.... ---- useful for onlick or onload | |
{ | |
$regex = <<<'REGEX' | |
#on[^= ]+?=\s*?(['"]).*?\1#is | |
REGEX; | |
$html = preg_replace($regex, '', $html); | |
} | |
return $html; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment