-
-
Save soyuka/4468eab47aceb6abd1bf to your computer and use it in GitHub Desktop.
<?php | |
namespace Fry; | |
use JsonStreamingParser\Listener; | |
/** | |
* This implementation allows to process an object at a specific level | |
* when it has been fully parsed | |
*/ | |
class ObjectListener implements Listener | |
{ | |
/** @var string Current key **/ | |
private $_key; | |
/** @var int Array deep level **/ | |
private $array_level = 0; | |
/** @var int Object deep level **/ | |
private $object_level = 0; | |
/** @var array Pointer that aliases the current array that represents an object or an array **/ | |
private $pointer; | |
/** | |
* @var array $array_pointers Stores different array pointers according to the deep level | |
* @var array $object_pointers Stores different objects pointers according to the deep level | |
* Those are used to track pointers, it's easy to go forward or backwards by using this | |
* As they are only pointers, in PHP "aliases" they shouldn't eat much memory even with big objects | |
*/ | |
private $array_pointers, $object_pointers; | |
/** @var array Main array that stores the current building object **/ | |
private $stack = array(); | |
private $callback, $end_callback; | |
/** | |
* @param function $callback the function called when a json object has been fully parsed | |
* | |
* @throws InvalidArgumentException if callback isn't callable | |
* | |
* @return void | |
*/ | |
public function __construct($callback, $end_callback = null) | |
{ | |
if(!is_callable($callback)) { | |
throw new \InvalidArgumentException("Callback should be a callable function"); | |
} | |
$this->callback = $callback; | |
$this->end_callback = $end_callback; | |
} | |
public function file_position($line, $char) | |
{ | |
} | |
/** | |
* Document start | |
* Init every variables and place the pointer on the stack | |
* | |
* @return void | |
*/ | |
public function start_document() | |
{ | |
$this->stack = array(); | |
$this->array_pointers = array(); | |
$this->array_level = 0; | |
$this->object_level = 0; | |
$this->object_pointers = array(); | |
$this->keys = array(); | |
$this->_key = null; | |
$this->pointer =& $this->stack; | |
} | |
/** | |
* Document end (EOF) | |
* | |
* @return void | |
*/ | |
public function end_document() | |
{ | |
// release memory | |
$this->start_document(); | |
if (is_callable($this->end_callback)) { | |
call_user_func_array($this->end_callback, []); | |
} | |
} | |
/** | |
* Start object | |
* An object began... | |
* | |
* @return void | |
*/ | |
public function start_object() | |
{ | |
//Increase the object level | |
$this->object_level++; | |
//Point on the current array | |
$this->pointer =& $this->array_pointers[$this->array_level]; | |
//Get the current index | |
$array_index = isset($this->pointer) ? count($this->pointer) : 0; | |
//Build an array on this index | |
$this->pointer[$array_index] = array(); | |
//Pointer is now this new array | |
$this->pointer =& $this->pointer[$array_index]; | |
//Store it | |
$this->object_pointers[$this->object_level] =& $this->pointer; | |
} | |
/** | |
* End Object | |
* An object ended | |
* | |
* @return void | |
*/ | |
public function end_object() | |
{ | |
$this->pointer =& $this->array_pointers[$this->array_level]; | |
//We've reach a full object on my root array, callback | |
if($this->array_level == 1 && $this->object_level == 1) { | |
call_user_func_array($this->callback, [$this->stack[0]]); | |
array_shift($this->stack[0]); //release this item from memory | |
} | |
$this->object_level--; | |
} | |
/** | |
* Start array | |
* An array began... | |
* | |
* @return void | |
*/ | |
public function start_array() | |
{ | |
$this->array_level++; | |
//If we have a key it's our index | |
if($this->_key) { | |
$index = $this->_key; | |
$this->_key = null; | |
} else { | |
$index = isset($this->pointer) ? count($this->pointer) : 0; | |
} | |
//This is our array, point on it | |
$this->pointer[$index] = array(); | |
$this->pointer =& $this->pointer[$index]; | |
//Store the pointer | |
$this->array_pointers[$this->array_level] =& $this->pointer; | |
} | |
/** | |
* End array | |
* | |
* Now it ended... | |
* @todo, according to both levels, point to the nearest one array or object | |
* @return void | |
*/ | |
public function end_array() | |
{ | |
//Point on the last known object | |
$this->pointer =& $this->object_pointers[$this->object_level]; | |
$this->array_level--; | |
} | |
/** | |
* Called when a key is founded | |
* @param string $key | |
* @return void | |
*/ | |
public function key($key) | |
{ | |
$this->_key = $key; | |
} | |
/** | |
* Called when a value is founded | |
* @param mixed $value may be a string, integer, boolean, null | |
* @return null | |
*/ | |
public function value($value) | |
{ | |
if($this->_key) { | |
$this->pointer[$this->_key] = $value; | |
return; | |
} | |
$this->pointer[] = $value; | |
} | |
public function whitespace($whitespace) | |
{ | |
} | |
} |
<?php | |
$testfile = __DIR__.'/example.json'; //https://gist.github.com/soyuka/a1d83ff9ff1a6c5cc269 | |
$listener = new ObjectListener(function($obj) { | |
var_dump($obj); | |
}); | |
$stream = fopen($testfile, 'r'); | |
try { | |
$parser = new JsonStreamingParser_Parser($stream, $listener); | |
$parser->parse(); | |
} catch (Exception $e) { | |
fclose($stream); | |
throw $e; | |
} |
For anyone wondering where "JsonStreamingParser\Listener" comes from, you can find the package in Salsify's JsonStreamingParser repo. However, there is no interface JsonStreamingParser\Listener
, which is used in the code above. Instead, I ended up using JsonStreamingParser\Listener\GeoJsonListener
in place of ObjectListener
. My code ended up looking like this:
$testfile = __DIR__.'/sample.json';
$listener = new \JsonStreamingParser\Listener\GeoJsonListener(function($obj) {
var_dump($obj);
});
$stream = fopen($testfile, 'r');
try {
$parser = new \JsonStreamingParser\Parser($stream, $listener);
$parser->parse();
} catch (Exception $e) {
fclose($stream);
throw $e;
}
You can generate sample JSON from this website. It generates properly formatted JSON. I don't know if GeoJsonListener resolves edge cases that the code above claims to handle, but in my personal use cases, GeoJsonListener works fine.
Actually @MAXakaWIZARD hosted something that looks like my code (improved version) on https://github.com/MAXakaWIZARD/JsonCollectionParser everything else can be found at https://github.com/salsify/jsonstreamingparser.
This project https://github.com/halaxa/json-machine simplifies big json parsing so much, that the only thing you need to parse a big json stream or file is simple foreach
. It doesn't get easier than that :)
Forked and updated here