Last active
September 28, 2015 00:48
-
-
Save gooh/1358174 to your computer and use it in GitHub Desktop.
function for people whining that preg_match_all is less to type and concluding from it that regex must be better for parsing html than a dom parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Run an XPath query against an HTML string and returns the results | |
* | |
* @param string $xpath The XPath query to run on $thml | |
* @param string $html The HTML to parse. Uses the previously used string if omitted | |
* @return array | |
*/ | |
function xpath_match_all($query, $html = '') | |
{ | |
static $dom; | |
static $xpath; | |
static $content; | |
if (!$dom) { | |
$dom = new DOMDocument; | |
} | |
if ($html !== '') { | |
$content = $html; | |
libxml_use_internal_errors(true); | |
$dom->loadHtml($html); | |
$xpath = new DOMXPath($dom); | |
libxml_use_internal_errors(false); | |
} | |
$matches = array(array(), array()); | |
foreach ($xpath->evaluate($query) as $i => $resultNode) { | |
$save = version_compare(PHP_VERSION, '5.3.6', '<') ? 'saveXml' : 'saveHtml'; | |
$matches[0][] = $dom->$save($resultNode); | |
$innerHtml = ''; | |
if ($resultNode->childNodes) { | |
foreach ($resultNode->childNodes as $childNode) { | |
$innerHtml .= $dom->$save($childNode); | |
} | |
} | |
$matches[1][] = $innerHtml; | |
} | |
return $matches; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Store the current state of
$user_errors
inlibxml_use_internal_errors()
https://gist.github.com/4546595/revisions