chrisguitarguy · February 27, 2013 04:15 · dearsina · Nov 20, 2022
diff --git a/big-xml.php b/big-xml.php
 <?php
 /**
 * an example of how to read huge XML files relatively quickly and efficiently
 * using a few core PHP libraries.
 *
 */

 // Assume your file is very large, 140MB or somethig like that
 $fn = __DIR__ . '/some_file.xml';

 // The tag we want to extract from the file
 $tag = 'item';

 // we'll use XMLReader to "parse" the large XML file directly because it doesn't
 // load the entire tree into memory, just "tokenizes" it enough to deal with
 $reader = new \XMLReader();

 // now open our file
 if (!$reader->open($fn)) {
    throw new \RuntimeException("Could not open {$fn} with XMLReader");
 }

 // loop though the file, read just advances to the next node.
 // XMLReader isn't aware of any the document tree, so nodes get
 // iterated over as they appear in the file. We'll just read until
 // the end of the file.
 while ($reader->read()) {

    // XMLReader::$name will contain the current tab name, check to see if it
    // matches the tag you're looking for. If it does, we can just iterate
    // over those tags using XMLReader::next().
    while ($tag === $reader->name) {

        // since XMLReader doesn't really supply us with much of a usable
        // API, we can convert the current node to an instace of `SimpleXMLElement`
        $elem = new \SimpleXMLElement($reader->readOuterXML());

        // now use SimpleXMLElement as you normally would.
        foreach ($elem->children() as $child) {
            echo $child->getName(), ': ', $child, PHP_EOL;
        }

        // Children in a certain namespace even.
        foreach ($elem->children('http://purl.org/dc/elements/1.1/') as $child) {
            echo "{http://purl.org/dc/elements/1.1/}", $child->getName(), ': ', $child, PHP_EOL;
        }

        // move on to the next one
        $reader->next($tag);
    }
 }
	<?php
	/**
	* an example of how to read huge XML files relatively quickly and efficiently
	* using a few core PHP libraries.
	*
	*/

	// Assume your file is very large, 140MB or somethig like that
	$fn = __DIR__ . '/some_file.xml';

	// The tag we want to extract from the file
	$tag = 'item';

	// we'll use XMLReader to "parse" the large XML file directly because it doesn't
	// load the entire tree into memory, just "tokenizes" it enough to deal with
	$reader = new \XMLReader();

	// now open our file
	if (!$reader->open($fn)) {
	throw new \RuntimeException("Could not open {$fn} with XMLReader");
	}

	// loop though the file, read just advances to the next node.
	// XMLReader isn't aware of any the document tree, so nodes get
	// iterated over as they appear in the file. We'll just read until
	// the end of the file.
	while ($reader->read()) {

	// XMLReader::$name will contain the current tab name, check to see if it
	// matches the tag you're looking for. If it does, we can just iterate
	// over those tags using XMLReader::next().
	while ($tag === $reader->name) {

	// since XMLReader doesn't really supply us with much of a usable
	// API, we can convert the current node to an instace of `SimpleXMLElement`
	$elem = new \SimpleXMLElement($reader->readOuterXML());

	// now use SimpleXMLElement as you normally would.
	foreach ($elem->children() as $child) {
	echo $child->getName(), ': ', $child, PHP_EOL;
	}

	// Children in a certain namespace even.
	foreach ($elem->children('http://purl.org/dc/elements/1.1/') as $child) {
	echo "{http://purl.org/dc/elements/1.1/}", $child->getName(), ': ', $child, PHP_EOL;
	}

	// move on to the next one
	$reader->next($tag);
	}
	}