-
-
Save siberex/4111285 to your computer and use it in GitHub Desktop.
A coroutine example: Streaming XML parsing using xml_parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
error_reporting(E_ALL); | |
/* Data can be send to coroutines using `$coroutine->send($data)`. The sent data will then | |
* be the result of the `yield` expression. Thus it can be received using a code like | |
* `$data = yield;`. | |
*/ | |
/* What we're building in this script is a coroutine-based streaming XML parser. The PHP | |
* extension for parsing streamed XML is xml_parser. It is used by defining a set of | |
* callback functions for various events (like start tag, end tag, content). | |
* | |
* This event model makes the parsing process very complicated, because you basically | |
* have to implement your own state machine (which is a lot of boilerplate code the | |
* more complicated the XML gets). | |
* | |
* To solve this problem, we build a wrapper (the following function), which redirects | |
* the events to a coroutine ($target). This is done simply using | |
* `$target->send([$eventName, $data])`. | |
*/ | |
function streamingXMLParser($target) { | |
$xmlParser = xml_parser_create(); | |
xml_set_element_handler( | |
$xmlParser, | |
function ($xmlParser, $name, array $attributes) use ($target) { | |
$target->send(['start', [$name, $attributes]]); | |
}, | |
function ($xmlParser, $name) use ($target) { | |
$target->send(['end', $name]); | |
} | |
); | |
xml_set_character_data_handler( | |
$xmlParser, | |
function ($xmlParser, $text) use ($target) { | |
$target->send(['text', $text]); | |
} | |
); | |
while ($data = yield) { | |
if (!xml_parse($xmlParser, $data)) { | |
throw new Exception(sprintf( | |
'XML error "%s" on line %d', | |
xml_error_string(xml_get_error_code($xmlParser)), | |
xml_get_current_line_number($xmlParser) | |
)); | |
} | |
} | |
xml_parser_free($xmlParser); | |
} | |
/* Inside the target coroutine the actual parsing happens. The events are received | |
* using `list($event, $data) = yield`. The main advantage that coroutines bring | |
* here is that you can fetch the events in nested loops. This way you are implicitly | |
* building a state machine (but the state is managed by PHP, not you!) | |
* | |
* This particular coroutine parses bus location data (for samples scroll down). The | |
* result is passed to another $target coroutine. | |
*/ | |
function busXMLParser($target) { | |
while (true) { | |
list($event, $data) = yield; | |
if ($event == 'start' && $data[0] == 'BUS') { | |
$dict = []; | |
$content = ''; | |
while (true) { | |
list($event, $data) = yield; | |
if ($event == 'start') { | |
$content = ''; | |
} elseif ($event == 'text') { | |
$content .= $data; | |
} elseif ($event == 'end') { | |
if ($data == 'BUS') { | |
$target->send($dict); | |
break; | |
} | |
$dict[strtolower($data)] = $content; | |
} | |
} | |
} | |
} | |
} | |
/* This coroutine prints out the info it receives from the bus XML parser. */ | |
function busLocationPrinter() { | |
while (true) { | |
$data = yield; | |
echo "Bus $data[id] is currently at $data[latitude]/$data[longitude]\n"; | |
} | |
} | |
/* Here we are building up a coroutine pipeline. You should read this as: | |
* The streaming XML parser is passing data to the bus XML parser, which | |
* is passing data to the bus location printer. | |
*/ | |
$parser = streamingXMLParser(busXMLParser(busLocationPrinter())); | |
/* I don't have access to a real bus location API, so I'll just stream some | |
* fictional sample data */ | |
$parser->send('<?xml version="1.0"?><buses>'); | |
while (true) { | |
sleep(1); | |
$parser->send(sprintf( | |
'<bus><id>%d</id><latitude>%f</latitude><longitude>%f</longitude></bus>', | |
mt_rand(1, 1000), lcg_value(), lcg_value() | |
)); | |
} | |
/* If your head is buzzing now, that's a good thing :P */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment