Skip to content

Instantly share code, notes, and snippets.

@theraot
Last active December 18, 2021 23:38
Show Gist options
  • Save theraot/4b99abf20f3995cc374090487a5c521b to your computer and use it in GitHub Desktop.
Save theraot/4b99abf20f3995cc374090487a5c521b to your computer and use it in GitHub Desktop.
<?php
/**
* CC-BY 3.0 Alfonso J. Ramos (theraot)
* Parser
*/
final class Parser
{
//------------------------------------------------------------
// Private (Class)
//------------------------------------------------------------
private static function _ConsumeToPosition(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*int*/ $position, /*int*/ &$consumedLength)
{
if ($position === $offset)
{
$consumedLength = 0;
return '';
}
else
{
if ($position < $offset)
{
return null;
}
else
{
$consumedLength = $position - $offset;
return substr($input, $offset, $consumedLength);
}
}
}
private static function _Consume(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*mixed*/ $what = null, /*int*/ &$consumedLength)
{
if ($what === null)
{
$result = UTF8::CharacterAt($input, $offset);
$len = strlen($result);
if ($len > 0)
{
$consumedLength = $len;
return $result;
}
else
{
return null;
}
}
else
{
if (is_string($what))
{
$item = $what;
$len = strlen($item);
if ($offset + $len <= $length)
{
$result = substr($input, $offset, $len);
if ($result === $item)
{
$consumedLength = $len;
return true;
}
}
return false;
}
else if (is_numeric($what))
{
$result = UTF8::CharactersAt($input, $offset, $what);
$len = strlen($result);
if ($offset + $len <= $length)
{
$consumedLength = $len;
return $result;
}
else
{
return null;
}
}
else if (is_array($what) || ($what instanceof \Iterator))
{
if ($offset < $length)
{
foreach ($what as $item)
{
$len = strlen($item);
if ($offset + $len <= $length)
{
$result = substr($input, $offset, $len);
if ($result === $item)
{
$consumedLength = $len;
return $result;
}
}
}
}
return null;
}
else
{
return null;
}
}
}
private static function _ConsumeCallback(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*function*/ $callback = null, /*int*/ &$consumedLength)
{
if (is_callable($callback))
{
$result = UTF8::CharacterAt($input, $offset);
$len = strlen($result);
if ($len > 0 && call_user_func($callback, $result))
{
$consumedLength = $len;
return $result;
}
else
{
return null;
}
}
else
{
trigger_error('Invalid callback');
}
}
private static function _ConsumeUntil(/*string*/ $input, /*bool*/ $greedy, /*int*/ $offset, /*int*/ $length, /*mixed*/ $what, /*int*/ &$consumedLength)
{
if ($offset < $length)
{
if (is_string($what))
{
if (strlen($what) > 0 && ($position = strpos($input, $what, $offset)) !== false)
{
return Parser::_ConsumeToPosition($input, $offset, $length, $position, $consumedLength);
}
if ($greedy)
{
$consumedLength = $length - $offset;
return substr($input, $offset);
}
else
{
return '';
}
}
else if (is_array($what) || ($what instanceof \Iterator))
{
$bestPosition = 0;
$all = $greedy;
foreach ($what as $item)
{
if (is_string($item) && strlen($item) > 0 && ($position = strpos($input, $item, $offset)) !== false)
{
if ($all || $position < $bestPosition)
{
$bestPosition = $position;
$all = false;
}
}
}
if ($all)
{
$consumedLength = $length - $offset;
return substr($input, $offset);
}
else
{
return Parser::_ConsumeToPosition($input, $offset, $length, $bestPosition, $consumedLength);
}
}
else
{
return '';
}
}
else
{
return '';
}
}
private static function _ConsumeUntilLast(/*string*/ $input, /*bool*/ $greedy, /*int*/ $offset, /*int*/ $length, /*mixed*/ $what, /*int*/ &$consumedLength)
{
if ($offset < $length)
{
if (is_string($what))
{
if (strlen($what) > 0 && ($position = strrpos($input, $what, $offset)) !== false)
{
return Parser::_ConsumeToPosition($input, $offset, $length, $position, $consumedLength);
}
if ($greedy)
{
$consumedLength = $length - $offset;
return substr($input, $offset);
}
else
{
return '';
}
}
else if (is_array($what) || ($what instanceof \Iterator))
{
$bestPosition = 0;
$all = $greedy;
foreach ($what as $item)
{
if (is_string($item) && strlen($item) > 0 && ($position = strrpos($input, $item, $offset)) !== false)
{
if ($all || $position > $bestPosition)
{
$bestPosition = $position;
$all = false;
}
}
}
if ($all)
{
$consumedLength = $length - $offset;
return substr($input, $offset);
}
else
{
return Parser::_ConsumeToPosition($input, $offset, $length, $bestPosition, $consumedLength);
}
}
else
{
return '';
}
}
else
{
return '';
}
}
private static function _ConsumeUntilCallback(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*function*/ $callback, /*int*/ &$consumedLength)
{
if (is_callable($callback))
{
$consumedLength = 0;
if ($offset < $length)
{
$result = '';
while (true)
{
$_input = UTF8::CharacterAt($input, $offset);
if ($_input === '')
{
break;
}
else
{
if (!call_user_func($callback, $_input))
{
$len = strlen($_input);
$consumedLength += $len;
$offset += $len;
$result .= $_input;
}
else
{
break;
}
}
}
return $result;
}
else
{
return '';
}
}
else
{
trigger_error('Invalid callback');
}
}
private static function _ConsumeWhile(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*mixed*/ $what, /*int*/ &$consumedLength)
{
$consumedLength = 0;
if ($offset < $length)
{
$result = '';
if (is_string($what))
{
while (true)
{
$_input = UTF8::CharacterAt($input, $offset);
if ($_input === $what)
{
$len = strlen($_input);
$consumedLength += $len;
$offset += $len;
$result .= $_input;
}
else
{
break;
}
}
}
else if (is_array($what) || ($what instanceof \Iterator))
{
if ($what instanceof \Iterator)
{
$what = iterator_to_array($what, false);
}
while (true)
{
$_input = UTF8::CharacterAt($input, $offset);
if (in_array($_input, $what))
{
$len = strlen($_input);
$consumedLength += $len;
$offset += $len;
$result .= $_input;
}
else
{
break;
}
}
}
return $result;
}
else
{
return '';
}
}
private static function _ConsumeWhileCallback(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*function*/ $callback, /*int*/ &$consumedLength)
{
if (is_callable($callback))
{
$consumedLength = 0;
if ($offset < $length)
{
$result = '';
while (true)
{
$_input = UTF8::CharacterAt($input, $offset);
if ($_input === '')
{
break;
}
else
{
if (call_user_func($callback, $_input))
{
$len = strlen($_input);
$consumedLength += $len;
$offset += $len;
$result .= $_input;
}
else
{
break;
}
}
}
return $result;
}
else
{
return '';
}
}
else
{
trigger_error('Invalid callback');
}
}
//------------------------------------------------------------
// Private (Instance)
//------------------------------------------------------------
private $document;
private $documentPosition;
private $documentSize;
//------------------------------------------------------------
// Public (Instance)
//------------------------------------------------------------
/**
* Returns whatever or not there is more to consume
*
* @return bool
*/
public function CanConsume()
{
return $this->documentPosition < $this->documentSize;
}
/**
* Returns what has been consumed
*
* @return string
*/
public function Consumed()
{
return substr($this->document, 0, $this->documentPosition);
}
public function DocumentPosition()
{
return $this->documentPosition;
}
/**
* Disposes everything that has been consumed.
* This is equivalent to creating a new Parser with what is yet to be consumed
*
* @return string
*/
public function Flush()
{
$document = $this->NotConsumed();
$this->document = $document;
$this->documentSize = strlen($document);
$this->documentPosition = 0;
}
/**
* Returns what is yet to be consumed
*
* @return string
*/
public function NotConsumed()
{
return substr($this->document, $this->documentPosition);
}
/**
* Advances if what comes ahead matches $what and returns what was consumed
*
* @return string
*/
public function Consume(/*mixed*/ $what = null)
{
$result = Parser::_Consume($this->document, $this->documentPosition, $this->documentSize, $what, $consumedLength);
if ($result !== null)
{
$this->documentPosition += $consumedLength;
}
return $result;
}
/**
* Advances to the end of the document and returns what was consumed
*
* @return string
*/
public function ConsumeAll()
{
$position = $this->documentPosition;
$this->documentPosition = $this->documentSize;
return substr($this->document, $position);
}
/**
* Advances if what is ahead satisfies $callback and returns what was consumed
*
* @return string
*/
public function ConsumeCallback(/*mixed*/ $callback = null)
{
$result = Parser::_ConsumeCallback($this->document, $this->documentPosition, $this->documentSize, $what, $consumedLength);
if ($result !== null)
{
$this->documentPosition += $consumedLength;
}
return $result;
}
/**
* Advances until the consumed matches $what and returns what was consumed
*
* @return string
*/
public function ConsumeUntil(/*mixed*/ $what, /*bool*/ $greedy = true)
{
$result = Parser::_ConsumeUntil($this->document, $greedy, $this->documentPosition, $this->documentSize, $what, $consumedLength);
if ($result !== null)
{
$this->documentPosition += $consumedLength;
}
return $result;
}
/**
* Advances until the consumed matches $what and returns what was consumed
*
* @return string
*/
public function ConsumeUntilLast(/*mixed*/ $what, /*bool*/ $greedy = true)
{
$result = Parser::_ConsumeUntilLast($this->document, $greedy, $this->documentPosition, $this->documentSize, $what, $consumedLength);
if ($result !== null)
{
$this->documentPosition += $consumedLength;
}
return $result;
}
/**
* Advances until $callback yields true and returns what was consumed
*
* @return string
*/
public function ConsumeUntilCallback(/*function*/ $callback)
{
$result = Parser::_ConsumeUntilCallback($this->document, $this->documentPosition, $this->documentSize, $callback, $consumedLength);
if ($result !== null)
{
$this->documentPosition += $consumedLength;
}
return $result;
}
/**
* Advances while the consumed matches $what
*
* @return string
*/
public function ConsumeWhile(/*mixed*/ $what)
{
$result = Parser::_ConsumeWhile($this->document, $this->documentPosition, $this->documentSize, $what, $consumedLength);
if ($result !== null)
{
$this->documentPosition += $consumedLength;
}
return $result;
}
/**
* Advances while $callback yields true and returns what was consumed
*
* @return string
*/
public function ConsumeWhileCallback(/*function*/ $callback)
{
$result = Parser::_ConsumeWhileCallback($this->document, $this->documentPosition, $this->documentSize, $callback, $consumedLength);
if ($result !== null)
{
$this->documentPosition += $consumedLength;
}
return $result;
}
/**
* Looks ahead in the document without advancing, returns what is ahead if it matches $what
*
* @return string
*/
public function Peek(/*mixed*/ $what = null)
{
return Parser::_Consume($this->document, $this->documentPosition, $this->documentSize, $what, $consumedLength);
}
/**
* Returns the Parser to the start of the document
*/
public function Reset()
{
$this->documentPosition = 0;
}
//------------------------------------------------------------
// Public (Constructor)
//------------------------------------------------------------
public function __construct(/*string*/ $document)
{
if (is_string($document))
{
$this->document = $document;
$this->documentSize = strlen($document);
$this->documentPosition = 0;
}
else
{
trigger_error('document must be string');
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment