-
-
Save hakre/1879466 to your computer and use it in GitHub Desktop.
function for people whining that preg_match_all is less to type and concluding from it that regex must be better for parsing html than a dom parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @param string $query xpath query/expression | |
* @param string $html (optional) | |
* @return array|FALSE array on success, FALSE on error | |
*/ | |
function xpath_match_all($query, $html = '') | |
{ | |
static $dom; | |
static $xpath; | |
static $content; | |
if (!$dom) { | |
$dom = new DOMDocument; | |
} | |
if ($html !== '') { | |
$content = $html; | |
libxml_use_internal_errors(true); | |
$dom->loadHtml($html); | |
$xpath = new DOMXPath($dom); | |
libxml_use_internal_errors(false); | |
} | |
$queryResult = $xpath->query($query); | |
if (FALSE === $queryResult) return $queryResult; | |
$matches = array(array(), array()); | |
foreach ($queryResult as $resultNode) { | |
$save = version_compare(PHP_VERSION, '5.3.6', '<') ? 'saveXml' : 'saveHtml'; | |
$matches[0][] = $dom->$save($resultNode); | |
$innerHtml = ''; | |
if ($resultNode->childNodes) { | |
foreach ($resultNode->childNodes as $childNode) { | |
$innerHtml .= $dom->$save($childNode); | |
} | |
} | |
$matches[1][] = $innerHtml; | |
} | |
return $matches; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment