Skip to content

Instantly share code, notes, and snippets.

@julp
Last active December 22, 2015 03:59
Show Gist options
  • Select an option

  • Save julp/6414365 to your computer and use it in GitHub Desktop.

Select an option

Save julp/6414365 to your computer and use it in GitHub Desktop.
[PHP] Truncate HTML text to a given length
<?php
var_dump(
html_cut('<b>12<a href="#">34<i>56</i></a></b><u>78</u>', 3),
// html_cut('<b>1&eacute;<a href="#">34<i>56</i></a></b><u>78</u>', 3),
html_cut('<b>1<![CDATA[é]]><a href="#">34<i>56</i></a></b><u>78</u>', 3),
NULL
);
function html_cut(/* UTF-8 */ $string, $length)
{
$dom = new DomDocument;
// Create fake root which will wrap our HTML code
// $dom->loadHTML('<html><body/></html>', LIBXML_NOBLANKS | LIBXML_NOENT); // with s/$dom->documentElement/$dom->getElementsByTagName('body')->item(0)/
$dom->appendChild($dom->createElement('span'));
$dom->documentElement->setAttribute('class', 'root');
$dom->preserveWhiteSpace = FALSE; // no effect ?
// Import HTML into document
$fragment = $dom->createDocumentFragment();
$fragment->appendXML($string); # html_entity_decode($string, ENT_NOQUOTES/* | ENT_HTML5*/, 'UTF-8'), ENT_NOQUOTES, 'UTF-8') => UNSAFE (XSS) !!!
$dom->documentElement->appendChild($fragment);
$stack = array();
$t = function ($node) use (&$stack, &$t) {
if ($node->hasChildNodes()) {
foreach ($node->childNodes as $child) {
$t($child);
}
}
$stack[] = $node;
};
$t($dom->documentElement);
$count = 0;
$done = FALSE;
$parents = array($dom->documentElement);
foreach ($stack as $node) {
if ($done) {
$skip = FALSE;
foreach ($parents as $n) {
if ($n->isSameNode($node)) {
$skip = TRUE;
break;
}
}
if (!$skip) {
try {
$node->parentNode->removeChild($node);
} catch (Exception $e) {
/* NOP */
}
}
} else {
if (in_array($node->nodeType, array(XML_TEXT_NODE, XML_CDATA_SECTION_NODE))) {
if ($count + mb_strlen($node->nodeValue, 'UTF-8') >= $length) {
$done = TRUE;
$node->nodeValue = mb_substr($node->nodeValue, 0, $length - $count);
$n = $node->parentNode;
while (!$dom->documentElement->isSameNode($n)) {
$parents[] = $n;
$n = $n->parentNode;
}
} else {
$count += mb_strlen($node->nodeValue, 'UTF-8');
}
}
}
}
// Unwrap
$output = '';
foreach ($dom->documentElement->childNodes as $child) {
$output .= $dom->saveXML($child);
}
return $output;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment