Created
May 23, 2016 14:01
-
-
Save hussainweb/fec4da41589ded834a3085e6d17a4a9b to your computer and use it in GitHub Desktop.
HTML parsing support (primarily for Drupal migrate framework, but could be used generally)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @file | |
* HTML parsing support. | |
*/ | |
class MigrateHtmlParser { | |
protected $html; | |
protected $dom; | |
protected $elements; | |
public function __construct($html) { | |
$this->html = $html; | |
// DOM can load HTML soup. But, HTML soup can throw warnings, suppress them. | |
$old = libxml_use_internal_errors(TRUE); | |
$dom = new DOMDocument(); | |
$dom->loadHTML($html); | |
libxml_use_internal_errors($old); | |
if ($dom) { | |
// It's much easier to work with simplexml than DOM, luckily enough | |
// we can just simply import our DOM tree. | |
$this->elements = simplexml_import_dom($dom); | |
} | |
else { | |
throw new UnexpectedValueException("The html could not be parsed"); | |
} | |
} | |
public function xpath($xpath) { | |
$result = $this->elements->xpath($xpath); | |
// Some combinations of PHP / libxml versions return an empty array | |
// instead of the documented FALSE. Forcefully convert any falsish values | |
// to an empty array to allow foreach(...) constructions. | |
return $result ? $result : array(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment