Skip to content

Instantly share code, notes, and snippets.

@olsgreen
Last active April 29, 2018 07:02
Show Gist options
  • Save olsgreen/19a0b40f5851976069f897c9159fb9a0 to your computer and use it in GitHub Desktop.
Save olsgreen/19a0b40f5851976069f897c9159fb9a0 to your computer and use it in GitHub Desktop.
Strip all tags from an HTML fragment except selected tags and their attributes.
<?php
/*
* Strip all tags from an HTML fragment except selected tags and their attributes.
*
* $content = 'Hello world. <script>alert("XSS");</script>';
* $content .= But you can still click on this <a href="http://gog.gl" onclick="alert(\'XSS\')">link</a>';
*
* cleanHtml($content, $whitelist = ['a' => ['href']])
*
* > Hello world. But you can still click on this <a href="http://gog.gl">link</a>
*/
protected function stripTags($html, $whitelist = [])
{
// <p></p> must be allowed as it's used to wrap
// content that has no wrapper.
$whitelist = array_merge($whitelist, ['p' => []]);
$dom = new DOMDocument;
$dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$xpath = new DOMXPath($dom);
// Remove all but the white-listed tags.
$tags = array_keys($whitelist);
// Get all elements within the HTML.
$nodes = $xpath->query('//*');
foreach ($nodes as $node) {
if (!in_array($node->tagName, $tags)) {
$node->parentNode->removeChild($node);
}
}
// Remove the non-specified attributes in for the remaining white-listed tags.
$nodes = $xpath->query('//@*');
foreach ($nodes as $node) {
$allowedAttributes = $whitelist[$node->parentNode->tagName];
if (!in_array($node->nodeName, $allowedAttributes)) {
$node->parentNode->removeAttribute($node->nodeName);
}
}
return $dom->saveHTML();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment