Skip to content

Instantly share code, notes, and snippets.

@beyondlimits
Created April 10, 2019 18:42
Show Gist options
  • Select an option

  • Save beyondlimits/414b99f9f34e14b92d058826b5678ac3 to your computer and use it in GitHub Desktop.

Select an option

Save beyondlimits/414b99f9f34e14b92d058826b5678ac3 to your computer and use it in GitHub Desktop.
<?php
require_once 'EasyXMLParser.php';
class AmazonS3XMLParser
{
protected $properties;
protected $contents;
protected $currentItem;
function __construct($filename) {
$this->contents = array();
$root = 'listbucketresult';
$parser = new EasyXMLParser;
$parser->rule("$root ?", array(
'cdata' => function ($data, $parser) {
$stack = $parser->stack();
$tag = end($stack);
@$this->properties[$tag['name']] .= $data;
}
))
->rule("$root contents", array(
'start' => function () {
$this->currentItem = array();
},
'end' => function () {
$this->contents[] = $this->currentItem;
$this->currentItem = null;
unset ($this->currentItem);
}
))
->rule("$root contents ?", array(
'cdata' => function ($data, $parser) {
$stack = $parser->stack();
$tag = end($stack);
@$this->currentItem[$tag['name']] .= $data;
}
));
$parser->parse_file($filename);
}
function __get($name) {
$name = strtolower($name);
if ($name == 'contents') {
return $this->contents;
}
if (array_key_exists($name, $this->properties)) {
return $this->properties[$name];
}
}
}
<?php
require_once 'EasyXMLParserException.php';
require_once 'Finalizer.php';
class EasyXMLParser
{
protected $stack; # stack of tags and attributes
protected $hstack; # stack of tag handlers
protected $wstack; # '+' handlers
protected $ruleset = array(); # ruleset
protected $miss; # current depth of missed tags
public $bufferSize = 4096;
public function parse_file($filename)
{
$this->stack = array();
$this->hstack = array($this->ruleset);
$this->wstack = array();
$this->miss = 0;
$fin = new Finalizer('set_error_handler', set_error_handler([$this, 'error_handler']));
$fh = fopen($filename, 'r');
if (!$fh) {
return false;
}
$fin('fclose', $fh);
$parser = xml_parser_create();
$fin('xml_parser_free', $parser);
xml_set_object($parser, $this);
xml_set_character_data_handler($parser, 'character_data_handler');
xml_set_default_handler($parser, 'default_handler');
xml_set_element_handler($parser, 'start_element_handler', 'end_element_handler');
do {
try {
$buffer = fread($fh, $this->bufferSize);
$last = feof($fh)
xml_parse($parser, $buffer, $last);
} catch (Exception $e) {
$line = xml_get_current_line_number($parser);
$column = xml_get_current_column_number($parser);
$message = "Parse error on line $line, column $column";
$code = $e->code();
throw new EasyXMLParserException($message, $code, $line, $column, $e);
}
} while (!$last);
return true;
}
public function rule($tagset, $rules)
{
foreach ((array) $tagset as $tags) {
$tags = explode(' ', $tags);
$handler = &$this->ruleset;
foreach ($tags as $tag) {
$tag = strtolower($tag);
if ($tag !== '') {
if (!array_key_exists($tag, $handler) || !is_array($handler[$tag])) {
$handler[$tag] = array();
}
$handler = &$handler[$tag];
}
}
foreach ($rules as $key => $callback) {
$handler["@$key"] = $callback;
}
}
}
public function stack()
{
return $this->stack;
}
public function path()
{
return implode('/', array_map(function($tag) {
return $tag['name'];
}, $this->stack());
}
protected function character_data_handler($parser, $data)
{
if (!$this->miss) {
$handler = end($this->hstack);
if (array_key_exists('@cdata', $handler) && is_callable($handler['@cdata'])) {
$handler['@cdata']($data, $this);
}
}
}
protected function default_handler($parser, $data)
{
if (!$this->miss) {
$handler = end($this->hstack);
if ($handler && array_key_exists('@default', $handler) && is_callable($handler['@default'])) {
$handler['@default']($data, $this);
}
}
}
protected function start_element_handler($parser, $name, $attr)
{
if ($this->miss) {
$this->miss++;
} else {
$attr = array_change_key_case($attr);
$name = strtolower($name);
$handler = end($this->hstack);
if (array_key_exists('+', $handler) && is_array($handler['+'])) {
$this->wstack[] = $handler['+'];
}
if (array_key_exists($name, $handler) && is_array($handler[$name])) {
$handler = $handler[$name];
} elseif (array_key_exists('?', $handler) && is_array($handler['?'])) {
$handler = $handler['?'];
} elseif ($this->wstack) {
$handler = end($this->wstack);
} else {
$this->miss++;
}
if (!$this->miss) {
$this->stack[] = array('name' => $name, 'attr' => $attr);
$this->hstack[] = $handler;
if (array_key_exists('@start', $handler) && is_callable($handler['@start'])) {
$handler['@start']($attr, $name, $this);
}
}
}
}
protected function end_element_handler($parser, $name)
{
if ($this->miss) {
$this->miss--;
} else {
$handler = array_pop($this->hstack);
$tag = array_pop($this->stack);
if (array_key_exists('@end', $handler) && is_callable($handler['@end'])) {
$handler['@end']($tag['attr'], $tag['name'], $this);
}
$handler = end($this->hstack);
if (array_key_exists('+', $handler) && is_array($handler['+'])) {
array_pop($this->wstack);
}
}
}
protected function error_handler($errno, $errstr, $errfile, $errline)
{
throw new ErrorException($errstr, $errno, E_ERROR, $errfile, $errline);
}
}
<?php
class EasyXMLParserException extends Exception
{
protected $xmlLine;
protected $xmlColumn;
public function __construct($message, $code, $line, $column, Throwable $previous)
{
parent::__construct($message, $code, $previous);
$this->xmlLine = $line;
$this->xmlColumn = $column;
}
public function getXMLLine()
{
return $this->xmlLine;
}
public function getXMLColumn;
{
return $this->xmlColumn;
}
}
<?php
class Finalizer
{
protected $callbacks = array();
public function __construct(callable $callback = null)
{
if ($callback) {
$this->callbacks[] = func_get_args();
}
}
public function __destruct()
{
foreach ($this->callbacks as $callback) {
call_user_func_array('call_user_func', $callback);
}
}
public function __invoke(callable $callback)
{
$this->callbacks[] = func_get_args();
}
}
<?php
require_once 'EasyXMLParser.inc';
class NextoXMLParser
{
protected $parser;
protected $product;
protected $tag;
protected $obj;
protected $subscription;
protected $value;
public $product_handler;
public function __construct()
{
$this->parser = new EasyXMLParser;
$handlers['@start'] = array($this, 'product_start');
$handlers['@end'] = array($this, 'product_end');
$handlers['*']['@start'] = array($this, 'unknown_start');
$handlers['*']['@end'] = array($this, 'unknown_end');
$tags = array(
'name',
'type',
'url',
'category',
'issn',
'manufacturer',
'language',
'description',
'review-note',
);
foreach ($tags as $tag)
{
$handlers[$tag]['@start'] = array($this, 'product_field_start');
$handlers[$tag]['@end'] = array($this, 'product_field_end');
$handlers[$tag]['@cdata'] = array($this, 'product_field_cdata');
$handlers[$tag]['*']['@start'] = array($this, 'unknown_start');
$handlers[$tag]['*']['@end'] = array($this, 'unknown_end');
}
$handlers['issues']['@start'] = array($this, 'issues_start');
$handlers['issues']['@end'] = array($this, 'issues_end');
$handlers['issues']['*']['@start'] = array($this, 'unknown_start');
$handlers['issues']['*']['@end'] = array($this, 'unknown_end');
$handlers['subscriptions']['@start'] = array($this, 'subscriptions_start');
$handlers['subscriptions']['@end'] = array($this, 'subscriptions_end');
$handlers['subscriptions']['*']['@start'] = array($this, 'unknown_start');
$handlers['subscriptions']['*']['@end'] = array($this, 'unknown_end');
$issue['@start'] = array($this, 'issue_start');
$issue['@end'] = array($this, 'issue_end');
$issue['*']['@start'] = array($this, 'unknown_start');
$issue['*']['@end'] = array($this, 'unknown_end');
$tags = array(
'title',
'publication-date',
'activation-date',
'formats-all',
'description',
'pages-count',
'audiobook-length',
);
foreach ($tags as $tag)
{
$issue[$tag]['@start'] = array($this, 'issue_field_start');
$issue[$tag]['@end'] = array($this, 'issue_field_end');
$issue[$tag]['@cdata'] = array($this, 'issue_field_cdata');
$issue[$tag]['*']['@start'] = array($this, 'unknown_start');
$issue[$tag]['*']['@end'] = array($this, 'unknown_end');
}
$tags = array(
'price-original',
'price-normal',
'price-club',
);
foreach ($tags as $tag) {
$issue['price'][$tag]['@start'] = array($this, 'issue_field_start');
$issue['price'][$tag]['@end'] = array($this, 'issue_field_end');
$issue['price'][$tag]['@cdata'] = array($this, 'issue_field_cdata');
$issue['price'][$tag]['*']['@start'] = array($this, 'unknown_start');
$issue['price'][$tag]['*']['@end'] = array($this, 'unknown_end');
}
$issue['price']['*']['@start'] = array($this, 'unknown_start');
$issue['price']['*']['@end'] = array($this, 'unknown_end');
$tags = array(
'formats' => 'format',
'covers' => 'cover',
'tocs' => 'table-of-content',
'authors' => 'author',
'lectors' => 'lector',
'free-fragments' => 'free-fragment',
);
foreach ($tags as $k => $v) {
$issue[$k]['@start'] = array($this, 'array_start');
$issue[$k]['@end'] = array($this, 'array_end');
$issue[$k]['*']['@start'] = array($this, 'unknown_start');
$issue[$k]['*']['@end'] = array($this, 'unknown_end');
$issue[$k][$v]['@start'] = array($this, 'value_start');
$issue[$k][$v]['@end'] = array($this, 'value_end');
$issue[$k][$v]['@cdata'] = array($this, 'value_cdata');
$issue[$k][$v]['*']['@start'] = array($this, 'unknown_start');
$issue[$k][$v]['*']['@end'] = array($this, 'unknown_end');
}
$subscription['@start'] = array($this, 'subscription_start');
$subscription['@end'] = array($this, 'subscription_end');
$subscription['*']['@start'] = array($this, 'unknown_start');
$subscription['*']['@end'] = array($this, 'unknown_end');
$tags = array(
'name',
'quantity',
'format',
'cyclic',
);
foreach ($tags as $tag) {
$subscription[$tag]['@start'] = array($this, 'subscription_field_start');
$subscription[$tag]['@end'] = array($this, 'subscription_field_end');
$subscription[$tag]['@cdata'] = array($this, 'subscription_field_cdata');
$subscription[$tag]['*']['@start'] = array($this, 'unknown_start');
$subscription[$tag]['*']['@end'] = array($this, 'unknown_end');
}
$tags = array(
'price-original',
'price-normal',
'price-club',
);
foreach ($tags as $tag) {
$subscription['price'][$tag]['@start'] = array($this, 'subscription_field_start');
$subscription['price'][$tag]['@end'] = array($this, 'subscription_field_end');
$subscription['price'][$tag]['@cdata'] = array($this, 'subscription_field_cdata');
$subscription['price'][$tag]['*']['@start'] = array($this, 'unknown_start');
$subscription['price'][$tag]['*']['@end'] = array($this, 'unknown_end');
}
$subscription['price']['*']['@start'] = array($this, 'unknown_start');
$subscription['price']['*']['@end'] = array($this, 'unknown_end');
$handlers['issues']['issue'] = $issue;
$handlers['subscriptions']['subscription'] = $subscription;
$this->parser->handlers['nexto-products']['product'] = $handlers;
$this->parser->handlers['nexto-products']['*']['@start'] = array($this, 'unknown_start');
$this->parser->handlers['nexto-products']['*']['@end'] = array($this, 'unknown_start');
$this->parser->handlers['*']['@start'] = array($this, 'unknown_start');
$this->parser->handlers['*']['@end'] = array($this, 'unknown_end');
}
public function parse_file($filename)
{
return $this->parser->parse_file($filename);
}
public function product_start($parser, $name, $attr)
{
$this->product = array('attr' => $attr, 'data' => array());
}
public function product_end($parser, $name)
{
call_user_func($this->product_handler, $this->product);
unset($this->product);
}
public function product_field_start($parser, $name, $attr)
{
if (isset($this->product['data'][$name])) {
throw new Exception("Duplicate $name field in <product>");
}
$this->tag = $name;
$this->product['data'][$name] = array('attr' => $attr, 'value' => '');
}
public function product_field_end($parser, $name)
{
unset($this->tag);
}
public function product_field_cdata($parser, $data)
{
$this->product['data'][$this->tag]['value'] .= $data;
}
public function issues_start($parser, $name, $attr)
{
if (isset($this->product['data']['issues'])) {
throw new Exception('<issues> has been already defined');
}
$this->product['data']['issues'] = array('attr' => $attr, 'list' => array());
}
public function issues_end($parser, $name)
{
# nothing
}
public function issue_start($parser, $name, $attr)
{
$this->obj = array();
}
public function issue_end($parser, $name)
{
$this->product['data']['issues']['list'][] = $this->obj;
unset($this->obj);
}
public function issue_field_start($parser, $name, $attr)
{
if (isset($this->obj[$name])) {
throw new Exception("Duplicate $name field in <issue>");
}
$this->tag = $name;
$this->obj[$name] = array('attr' => $attr, 'value' => '');
}
public function issue_field_end($parser, $name)
{
unset($this->tag);
}
public function issue_field_cdata($parser, $data)
{
$this->obj[$this->tag]['value'] .= $data;
}
public function subscriptions_start($parser, $name, $attr)
{
if (isset($this->product['data']['subscriptions'])) {
throw new Exception('<subscriptions> has been already defined');
}
$this->product['data']['subscriptions'] = array('attr' => $attr, 'list' => array());
}
public function subscriptions_end($parser, $name)
{
# nothing
}
public function subscription_start($parser, $name, $attr)
{
$this->obj = array();
}
public function subscription_end($parser, $name)
{
$this->product['data']['subscriptions']['list'][] = $this->obj;
unset($this->obj);
}
public function subscription_field_start($parser, $name, $attr)
{
if (isset($this->obj[$name])) {
throw new Exception("Duplicate $name field in <subscription>");
}
$this->tag = $name;
$this->obj[$name] = array('attr' => $attr, 'value' => '');
}
public function subscription_field_end($parser, $name)
{
unset ($this->tag);
}
public function subscription_field_cdata($parser, $data)
{
$this->obj[$this->tag]['value'] .= $data;
}
public function array_start($parser, $name, $attr)
{
if (isset($this->obj[$name]))
throw new Exception("<$name> has been already defined");
$this->obj[$name] = array('attr' => $attr, 'list' => array());
$this->tag = $name;
}
public function array_end($parser, $name)
{
# nothing
}
public function value_start($parser, $name, $attr)
{
$this->value = array('attr' => $attr, 'value' => '');
}
public function value_end($parser, $name)
{
$this->obj[$this->tag]['list'][] = $this->value;
unset ($this->value);
}
public function value_cdata($parser, $data)
{
$this->value['value'] .= $data;
}
public function unknown_start($parser, $name, $attr)
{
$tags = array();
foreach ($parser->stack() as $tag) {
$tags[] = $tag['name'];
}
throw new Exception('Unknown start tag: ' . implode('/', $tags));
}
public function unknown_end($parser, $name)
{
$tags = array();
foreach ($parser->stack() as $tag) {
$tags[] = $tag['name'];
}
throw new Exception('Unknown end tag: ' . implode('/', $tags));
}
}
<?php
require_once 'EasyXMLParser.php';
class SubversionXMLParser
{
public $items;
function __construct ($filename) {
$this->items = array();
$parser = new EasyXMLParser;
$parser->rule("svn index ?", array(
'start' => function ($attr, $name) {
$this->items[$name][] = $attr;
}
));
$parser->parse_file($filename);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment