Skip to content

Instantly share code, notes, and snippets.

@xeoncross
Created August 6, 2011 17:32
Show Gist options
  • Save xeoncross/1129547 to your computer and use it in GitHub Desktop.
Save xeoncross/1129547 to your computer and use it in GitHub Desktop.
<?php
require_once('/home/alix/_.php');
ph();
function Purify($html, $tags = null)
{
$html = preg_replace('~<(script|style)[^>]*>.*?(?:</\1>|\z)~is', '', $html);
$html = strip_tags($html, '<' . implode('><', array_keys((array) $tags)) . '>');
if (is_array($tags) === true)
{
$html = strip_tags($html, '<' . implode('><', array_keys($tags)) . '>');
if (function_exists('dom_import_simplexml') === true)
{
if (is_object($dom = @dom_import_simplexml(ph()->Net->XML($html))) === true)
{
$dom = $dom->ownerDocument;
foreach ($tags as $tag => $attributes)
{
foreach ($dom->getElementsByTagName($tag) as $node)
{
if (($attributes !== true) && ($node->hasAttributes() === true))
{
$attributes = explode('|', $attributes);
foreach (range(max(0, $node->attributes->length - 1), 0) as $i)
{
$attribute = $node->attributes->item($i);
if ((empty($attributes) === true) || (in_array($attribute->name, $attributes) !== true))
{
$node->removeAttributeNode($attribute);
}
}
}
}
}
$html = preg_replace(array('~<(?:!DOCTYPE|/?(?:html|body))[^>]*>[[:space:]]*~i', '~^<p>|</p>$~i'), '', $dom->saveHTML());
}
}
}
else
{
$html = strip_tags($html);
}
return $html;
}
$html = 'hel<a>lo wo<b ID="one" foo="bar" class="strong">r</b>ld</p>';
ph()->Dump(Purify($html, array('b' => 'id|class')));
ph()->Dump(Purify($html, array('b' => null)));
ph()->Dump(Purify($html, array('b' => false)));
ph()->Dump(Purify($html, array('b' => array())));
########################################################################
$tags = array(); // strip all tags
$tags = array
(
'a' => '', // preserve <a> tags, but strip all attributes
'b' => null, // preserve <b> tags, but strip all attributes
'c' => false, // preserve <c> tags, but strip all attributes
'd' => array(), // preserve <d> tags, but strip all attributes
);
$tags = array
(
'a' => 'id|class', // preserve <a> tags, but strip all attributes except "id" or "class"
'b' => array('id', 'class'), // preserve <b> tags, but strip all attributes except "id" or "class"
);
// TODO:
$tags = array
(
'b' => array // preserve <b> tags
(
'id' => 'post[0-9]+|comment[0-9]+' // but strip all attributes except "id" if value matches regex
),
);
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment