-
-
Save soyuka/4468eab47aceb6abd1bf to your computer and use it in GitHub Desktop.
| <?php | |
| namespace Fry; | |
| use JsonStreamingParser\Listener; | |
| /** | |
| * This implementation allows to process an object at a specific level | |
| * when it has been fully parsed | |
| */ | |
| class ObjectListener implements Listener | |
| { | |
| /** @var string Current key **/ | |
| private $_key; | |
| /** @var int Array deep level **/ | |
| private $array_level = 0; | |
| /** @var int Object deep level **/ | |
| private $object_level = 0; | |
| /** @var array Pointer that aliases the current array that represents an object or an array **/ | |
| private $pointer; | |
| /** | |
| * @var array $array_pointers Stores different array pointers according to the deep level | |
| * @var array $object_pointers Stores different objects pointers according to the deep level | |
| * Those are used to track pointers, it's easy to go forward or backwards by using this | |
| * As they are only pointers, in PHP "aliases" they shouldn't eat much memory even with big objects | |
| */ | |
| private $array_pointers, $object_pointers; | |
| /** @var array Main array that stores the current building object **/ | |
| private $stack = array(); | |
| private $callback, $end_callback; | |
| /** | |
| * @param function $callback the function called when a json object has been fully parsed | |
| * | |
| * @throws InvalidArgumentException if callback isn't callable | |
| * | |
| * @return void | |
| */ | |
| public function __construct($callback, $end_callback = null) | |
| { | |
| if(!is_callable($callback)) { | |
| throw new \InvalidArgumentException("Callback should be a callable function"); | |
| } | |
| $this->callback = $callback; | |
| $this->end_callback = $end_callback; | |
| } | |
| public function file_position($line, $char) | |
| { | |
| } | |
| /** | |
| * Document start | |
| * Init every variables and place the pointer on the stack | |
| * | |
| * @return void | |
| */ | |
| public function start_document() | |
| { | |
| $this->stack = array(); | |
| $this->array_pointers = array(); | |
| $this->array_level = 0; | |
| $this->object_level = 0; | |
| $this->object_pointers = array(); | |
| $this->keys = array(); | |
| $this->_key = null; | |
| $this->pointer =& $this->stack; | |
| } | |
| /** | |
| * Document end (EOF) | |
| * | |
| * @return void | |
| */ | |
| public function end_document() | |
| { | |
| // release memory | |
| $this->start_document(); | |
| if (is_callable($this->end_callback)) { | |
| call_user_func_array($this->end_callback, []); | |
| } | |
| } | |
| /** | |
| * Start object | |
| * An object began... | |
| * | |
| * @return void | |
| */ | |
| public function start_object() | |
| { | |
| //Increase the object level | |
| $this->object_level++; | |
| //Point on the current array | |
| $this->pointer =& $this->array_pointers[$this->array_level]; | |
| //Get the current index | |
| $array_index = isset($this->pointer) ? count($this->pointer) : 0; | |
| //Build an array on this index | |
| $this->pointer[$array_index] = array(); | |
| //Pointer is now this new array | |
| $this->pointer =& $this->pointer[$array_index]; | |
| //Store it | |
| $this->object_pointers[$this->object_level] =& $this->pointer; | |
| } | |
| /** | |
| * End Object | |
| * An object ended | |
| * | |
| * @return void | |
| */ | |
| public function end_object() | |
| { | |
| $this->pointer =& $this->array_pointers[$this->array_level]; | |
| //We've reach a full object on my root array, callback | |
| if($this->array_level == 1 && $this->object_level == 1) { | |
| call_user_func_array($this->callback, [$this->stack[0]]); | |
| array_shift($this->stack[0]); //release this item from memory | |
| } | |
| $this->object_level--; | |
| } | |
| /** | |
| * Start array | |
| * An array began... | |
| * | |
| * @return void | |
| */ | |
| public function start_array() | |
| { | |
| $this->array_level++; | |
| //If we have a key it's our index | |
| if($this->_key) { | |
| $index = $this->_key; | |
| $this->_key = null; | |
| } else { | |
| $index = isset($this->pointer) ? count($this->pointer) : 0; | |
| } | |
| //This is our array, point on it | |
| $this->pointer[$index] = array(); | |
| $this->pointer =& $this->pointer[$index]; | |
| //Store the pointer | |
| $this->array_pointers[$this->array_level] =& $this->pointer; | |
| } | |
| /** | |
| * End array | |
| * | |
| * Now it ended... | |
| * @todo, according to both levels, point to the nearest one array or object | |
| * @return void | |
| */ | |
| public function end_array() | |
| { | |
| //Point on the last known object | |
| $this->pointer =& $this->object_pointers[$this->object_level]; | |
| $this->array_level--; | |
| } | |
| /** | |
| * Called when a key is founded | |
| * @param string $key | |
| * @return void | |
| */ | |
| public function key($key) | |
| { | |
| $this->_key = $key; | |
| } | |
| /** | |
| * Called when a value is founded | |
| * @param mixed $value may be a string, integer, boolean, null | |
| * @return null | |
| */ | |
| public function value($value) | |
| { | |
| if($this->_key) { | |
| $this->pointer[$this->_key] = $value; | |
| return; | |
| } | |
| $this->pointer[] = $value; | |
| } | |
| public function whitespace($whitespace) | |
| { | |
| } | |
| } |
| <?php | |
| $testfile = __DIR__.'/example.json'; //https://gist.github.com/soyuka/a1d83ff9ff1a6c5cc269 | |
| $listener = new ObjectListener(function($obj) { | |
| var_dump($obj); | |
| }); | |
| $stream = fopen($testfile, 'r'); | |
| try { | |
| $parser = new JsonStreamingParser_Parser($stream, $listener); | |
| $parser->parse(); | |
| } catch (Exception $e) { | |
| fclose($stream); | |
| throw $e; | |
| } |
For anyone wondering where "JsonStreamingParser\Listener" comes from, you can find the package in Salsify's JsonStreamingParser repo. However, there is no interface JsonStreamingParser\Listener, which is used in the code above. Instead, I ended up using JsonStreamingParser\Listener\GeoJsonListener in place of ObjectListener. My code ended up looking like this:
$testfile = __DIR__.'/sample.json';
$listener = new \JsonStreamingParser\Listener\GeoJsonListener(function($obj) {
var_dump($obj);
});
$stream = fopen($testfile, 'r');
try {
$parser = new \JsonStreamingParser\Parser($stream, $listener);
$parser->parse();
} catch (Exception $e) {
fclose($stream);
throw $e;
}
You can generate sample JSON from this website. It generates properly formatted JSON. I don't know if GeoJsonListener resolves edge cases that the code above claims to handle, but in my personal use cases, GeoJsonListener works fine.
Actually @MAXakaWIZARD hosted something that looks like my code (improved version) on https://github.com/MAXakaWIZARD/JsonCollectionParser everything else can be found at https://github.com/salsify/jsonstreamingparser.
This project https://github.com/halaxa/json-machine simplifies big json parsing so much, that the only thing you need to parse a big json stream or file is simple foreach. It doesn't get easier than that :)
Forked and updated here