Skip to content

Instantly share code, notes, and snippets.

@saeedvir
Last active September 27, 2025 03:01
Show Gist options
  • Save saeedvir/f00d76c33a95c1fbecb297df4a286ebc to your computer and use it in GitHub Desktop.
Save saeedvir/f00d76c33a95c1fbecb297df4a286ebc to your computer and use it in GitHub Desktop.
secure strip_tags function
<?php
/*
javascript version : https://gist.github.com/saeedvir/d9b80ff857481dcce26b7610442dfb3f
*/
function strip_tags_secure($input, $allowed = '', $options = []) {
if ($input === null || $input === '') {
return '';
}
// Convert to string
$str = (string)$input;
// Default options
$defaults = [
'encode_unsafe_chars' => true,
'remove_javascript_attributes' => true,
'allow_data_attributes' => false,
'remove_empty_tags' => true
];
$config = array_merge($defaults, $options);
// Parse allowed tags
$allowedTags = [];
if ($allowed) {
preg_match_all('/<([a-z][a-z0-9]*)\b[^>]*>/i', $allowed, $matches);
if (!empty($matches[1])) {
$allowedTags = array_map('strtolower', $matches[1]);
}
}
$allowedTags = array_unique($allowedTags);
// Remove comments and PHP tags
$result = preg_replace('/<!--[\s\S]*?-->|<\?(?:php)?[\s\S]*?\?>/i', '', $str);
// Remove script and style tags completely with their content
$result = preg_replace('/<script[\s\S]*?<\/script>|<style[\s\S]*?<\/style>/i', '', $result);
// Process HTML tags
$result = preg_replace_callback('/<\/?([a-z][a-z0-9]*)\b[^>]*>/i',
function($matches) use ($allowedTags, $config) {
return process_tag($matches, $allowedTags, $config);
},
$result
);
// Additional security measures
if ($config['encode_unsafe_chars']) {
$result = htmlspecialchars($result, ENT_QUOTES | ENT_HTML5, 'UTF-8', false);
// Decode allowed tags back to normal
foreach ($allowedTags as $tag) {
$openTag = '&lt;' . $tag;
$closeTag = '&lt;/' . $tag;
$result = str_replace([$openTag, $closeTag], ['<' . $tag, '</' . $tag], $result);
}
}
if ($config['remove_empty_tags']) {
$result = preg_replace('/<([a-z][a-z0-9]*)([^>]*)\s*>\s*<\/\1>/i', '', $result);
}
return $result;
}
function process_tag($matches, $allowedTags, $config) {
$fullTag = $matches[0];
$tagName = strtolower($matches[1]);
// Check if tag is allowed
if (!in_array($tagName, $allowedTags)) {
return '';
}
// Secure the allowed tag
return secure_tag($fullTag, $tagName, $config);
}
function secure_tag($tag, $tagName, $config) {
// List of dangerous attributes
$dangerousAttrs = [
'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate',
'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus',
'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate',
'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu',
'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged',
'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend',
'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop',
'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus',
'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup',
'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter',
'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange',
'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart',
'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll',
'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop',
'onsubmit', 'onunload', 'fscommand', 'seeksegmenttime'
];
$dangerousProtocols = ['javascript:', 'vbscript:', 'data:', 'livescript:'];
// For closing tags, return as-is
if (strpos($tag, '</') === 0) {
return $tag;
}
// Remove dangerous attributes
if ($config['remove_javascript_attributes']) {
// Remove event handlers
foreach ($dangerousAttrs as $attr) {
$tag = preg_replace('/\s+' . preg_quote($attr, '/') . '\s*=\s*"[^"]*"/i', '', $tag);
$tag = preg_replace('/\s+' . preg_quote($attr, '/') . '\s*=\s*\'[^\']*\'/i', '', $tag);
$tag = preg_replace('/\s+' . preg_quote($attr, '/') . '\s*=\s*[^\s>]+/i', '', $tag);
}
// Remove dangerous protocols from href, src, action
$tag = preg_replace_callback('/(href|src|action|background)\s*=\s*["\']([^"\']*)["\']/i',
function($matches) use ($dangerousProtocols) {
$value = $matches[2];
foreach ($dangerousProtocols as $protocol) {
if (stripos($value, $protocol) === 0) {
return $matches[1] . '=""'; // Empty the attribute
}
}
return $matches[0];
},
$tag
);
// Remove style attributes with dangerous expressions
$tag = preg_replace_callback('/style\s*=\s*["\']([^"\']*)["\']/i',
function($matches) {
if (preg_match('/(expression|javascript|vbscript|url\s*\([^)]*javascript:)/i', $matches[1])) {
return 'style=""';
}
return $matches[0];
},
$tag
);
// Remove data attributes if not allowed
if (!$config['allow_data_attributes']) {
$tag = preg_replace('/\s+data-\w+\s*=\s*"[^"]*"/i', '', $tag);
$tag = preg_replace('/\s+data-\w+\s*=\s*\'[^\']*\'/i', '', $tag);
$tag = preg_replace('/\s+data-\w+\s*=\s*[^\s>]+/i', '', $tag);
}
}
return $tag;
}
// Basic version (similar to original JavaScript function)
function strip_tags_basic($input, $allowed = '') {
if ($input === null || $input === '') {
return '';
}
$str = (string)$input;
// Parse allowed tags
$allowedTags = [];
if ($allowed) {
preg_match_all('/<([a-z][a-z0-9]*)\b[^>]*>/i', $allowed, $matches);
if (!empty($matches[1])) {
$allowedTags = array_map('strtolower', $matches[1]);
}
}
// Remove comments and PHP tags
$result = preg_replace('/<!--[\s\S]*?-->|<\?(?:php)?[\s\S]*?\?>/i', '', $str);
// Remove script and style tags
$result = preg_replace('/<script[\s\S]*?<\/script>|<style[\s\S]*?<\/style>/i', '', $result);
// Process remaining tags
$result = preg_replace_callback('/<\/?([a-z][a-z0-9]*)\b[^>]*>/i',
function($matches) use ($allowedTags) {
$tagName = strtolower($matches[1]);
if (in_array($tagName, $allowedTags)) {
return $matches[0];
}
return '';
},
$result
);
return $result;
}
// Usage examples:
$html = '<p onclick="alert(\'xss\')">Hello <b>world</b>! <script>alert("xss")</script></p>';
echo "Basic version:\n";
echo strip_tags_basic($html, '<p><b>') . "\n\n";
echo "Secure version:\n";
echo strip_tags_secure($html, '<p><b>', [
'remove_javascript_attributes' => true,
'encode_unsafe_chars' => true
]) . "\n\n";
// Test against XSS attacks
$xssTests = [
'<script>alert("xss")</script>',
'<img src="x" onerror="alert(1)">',
'<p style="expression(alert(1))">Test</p>',
'<a href="javascript:alert(1)">Click</a>',
'<div onmouseover="alert(1)">Hover</div>'
];
echo "XSS Protection Tests:\n";
foreach ($xssTests as $test) {
$secured = strip_tags_secure($test, '<p><img><a><div>', [
'remove_javascript_attributes' => true
]);
echo "Input: " . htmlspecialchars($test) . "\n";
echo "Output: " . htmlspecialchars($secured) . "\n\n";
}
?>
@saeedvir
Copy link
Author

saeedvir commented Sep 27, 2025

$dangerous = '<p onclick="alert(1)" style="expression(alert(1))">Test</p>';

// PHP strip_tags - allows dangerous attributes!
echo strip_tags($dangerous, '<p>');
// Output: <p onclick="alert(1)" style="expression(alert(1))">Test</p>

// Our secure version - removes dangerous attributes
echo strip_tags_secure($dangerous, '<p>');
// Output: <p>Test</p>


$test_cases = [
    null,
    '',
    '<p>Hello</p>',
    '<script>alert("xss")</script>'
];

foreach ($test_cases as $case) {
    echo "PHP strip_tags: " . strip_tags($case) . "\n";
    echo "Our version: " . strip_tags_secure($case) . "\n";
}


// Performance test
$html = str_repeat('<p>Test <b>content</b> with <script>alert("xss")</script></p>', 1000);

$start = microtime(true);
for ($i = 0; $i < 100; $i++) {
    strip_tags($html, '<p><b>');
}
$php_time = microtime(true) - $start;

$start = microtime(true);
for ($i = 0; $i < 100; $i++) {
    strip_tags_secure($html, '<p><b>');
}
$our_time = microtime(true) - $start;

echo "PHP strip_tags: " . $php_time . "s\n";
echo "Our version: " . $our_time . "s\n";
echo "Slowdown: " . (($our_time / $php_time) - 1) * 100 . "%\n";


$xss_attacks = [
    // Event handler injection
    '<img src="x" onerror="alert(1)">',
    
    // JavaScript URL
    '<a href="javascript:alert(1)">Click</a>',
    
    // CSS expression
    '<p style="width:expression(alert(1))">Test</p>',
    
    // Data URI with JavaScript
    '<iframe src="data:text/html,<script>alert(1)</script>"></iframe>',
    
    // Obfuscated attacks
    '<img src="x" oNerror="alert(1)">',
    '<a href="java&#115;cript:alert(1)">Click</a>'
];

echo "Security Test Results:\n";
foreach ($xss_attacks as $attack) {
    $php_result = strip_tags($attack, '<img><a><p><iframe>');
    $our_result = strip_tags_secure($attack, '<img><a><p><iframe>');
    
    echo "Attack: " . htmlspecialchars($attack) . "\n";
    echo "PHP strip_tags: " . htmlspecialchars($php_result) . " - " . 
         (strpos($php_result, 'alert') !== false ? "VULNERABLE" : "SAFE") . "\n";
    echo "Our version: " . htmlspecialchars($our_result) . " - " . 
         (strpos($our_result, 'alert') !== false ? "VULNERABLE" : "SAFE") . "\n\n";
}

// User comment from a form
$user_comment = '
    <p>Great article!</p>
    <script>stealCookies()</script>
    <img src="x" onerror="maliciousCode()">
    <a href="javascript:phishing()">Click for prize</a>
';

// PHP strip_tags - still vulnerable!
$php_cleaned = strip_tags($user_comment, '<p><img><a>');
echo $php_cleaned;
// Output: <p>Great article!</p><img src="x" onerror="maliciousCode()"><a href="javascript:phishing()">Click for prize</a>

// Our secure version - completely safe
$secure_cleaned = strip_tags_secure($user_comment, '<p><img><a>', [
    'remove_javascript_attributes' => true
]);
echo $secure_cleaned;
// Output: <p>Great article!</p><img src="x"><a>Click for prize</a>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment