-
-
Save dj9090/ed1e740c274eb69e142d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* an example of how to read huge XML files relatively quickly and efficiently | |
* using a few core PHP libraries. | |
* | |
*/ | |
// Assume your file is very large, 140MB or somethig like that | |
$fn = __DIR__ . '/some_file.xml'; | |
// The tag we want to extract from the file | |
$tag = 'item'; | |
// we'll use XMLReader to "parse" the large XML file directly because it doesn't | |
// load the entire tree into memory, just "tokenizes" it enough to deal with | |
$reader = new \XMLReader(); | |
// now open our file | |
if (!$reader->open($fn)) { | |
throw new \RuntimeException("Could not open {$fn} with XMLReader"); | |
} | |
// loop though the file, read just advances to the next node. | |
// XMLReader isn't aware of any the document tree, so nodes get | |
// iterated over as they appear in the file. We'll just read until | |
// the end of the file. | |
while ($reader->read()) { | |
// XMLReader::$name will contain the current tab name, check to see if it | |
// matches the tag you're looking for. If it does, we can just iterate | |
// over those tags using XMLReader::next(). | |
while ($tag === $reader->name) { | |
// since XMLReader doesn't really supply us with much of a usable | |
// API, we can convert the current node to an instace of `SimpleXMLElement` | |
$elem = new \SimpleXMLElement($reader->readOuterXML()); | |
// now use SimpleXMLElement as you normally would. | |
foreach ($elem->children() as $child) { | |
echo $child->getName(), ': ', $child, PHP_EOL; | |
} | |
// Children in a certain namespace even. | |
foreach ($elem->children('http://purl.org/dc/elements/1.1/') as $child) { | |
echo "{http://purl.org/dc/elements/1.1/}", $child->getName(), ': ', $child, PHP_EOL; | |
} | |
// move on to the next one | |
$reader->next($tag); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment