Last active
December 18, 2021 23:38
-
-
Save theraot/4b99abf20f3995cc374090487a5c521b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* CC-BY 3.0 Alfonso J. Ramos (theraot) | |
* Parser | |
*/ | |
final class Parser | |
{ | |
//------------------------------------------------------------ | |
// Private (Class) | |
//------------------------------------------------------------ | |
private static function _ConsumeToPosition(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*int*/ $position, /*int*/ &$consumedLength) | |
{ | |
if ($position === $offset) | |
{ | |
$consumedLength = 0; | |
return ''; | |
} | |
else | |
{ | |
if ($position < $offset) | |
{ | |
return null; | |
} | |
else | |
{ | |
$consumedLength = $position - $offset; | |
return substr($input, $offset, $consumedLength); | |
} | |
} | |
} | |
private static function _Consume(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*mixed*/ $what = null, /*int*/ &$consumedLength) | |
{ | |
if ($what === null) | |
{ | |
$result = UTF8::CharacterAt($input, $offset); | |
$len = strlen($result); | |
if ($len > 0) | |
{ | |
$consumedLength = $len; | |
return $result; | |
} | |
else | |
{ | |
return null; | |
} | |
} | |
else | |
{ | |
if (is_string($what)) | |
{ | |
$item = $what; | |
$len = strlen($item); | |
if ($offset + $len <= $length) | |
{ | |
$result = substr($input, $offset, $len); | |
if ($result === $item) | |
{ | |
$consumedLength = $len; | |
return true; | |
} | |
} | |
return false; | |
} | |
else if (is_numeric($what)) | |
{ | |
$result = UTF8::CharactersAt($input, $offset, $what); | |
$len = strlen($result); | |
if ($offset + $len <= $length) | |
{ | |
$consumedLength = $len; | |
return $result; | |
} | |
else | |
{ | |
return null; | |
} | |
} | |
else if (is_array($what) || ($what instanceof \Iterator)) | |
{ | |
if ($offset < $length) | |
{ | |
foreach ($what as $item) | |
{ | |
$len = strlen($item); | |
if ($offset + $len <= $length) | |
{ | |
$result = substr($input, $offset, $len); | |
if ($result === $item) | |
{ | |
$consumedLength = $len; | |
return $result; | |
} | |
} | |
} | |
} | |
return null; | |
} | |
else | |
{ | |
return null; | |
} | |
} | |
} | |
private static function _ConsumeCallback(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*function*/ $callback = null, /*int*/ &$consumedLength) | |
{ | |
if (is_callable($callback)) | |
{ | |
$result = UTF8::CharacterAt($input, $offset); | |
$len = strlen($result); | |
if ($len > 0 && call_user_func($callback, $result)) | |
{ | |
$consumedLength = $len; | |
return $result; | |
} | |
else | |
{ | |
return null; | |
} | |
} | |
else | |
{ | |
trigger_error('Invalid callback'); | |
} | |
} | |
private static function _ConsumeUntil(/*string*/ $input, /*bool*/ $greedy, /*int*/ $offset, /*int*/ $length, /*mixed*/ $what, /*int*/ &$consumedLength) | |
{ | |
if ($offset < $length) | |
{ | |
if (is_string($what)) | |
{ | |
if (strlen($what) > 0 && ($position = strpos($input, $what, $offset)) !== false) | |
{ | |
return Parser::_ConsumeToPosition($input, $offset, $length, $position, $consumedLength); | |
} | |
if ($greedy) | |
{ | |
$consumedLength = $length - $offset; | |
return substr($input, $offset); | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
else if (is_array($what) || ($what instanceof \Iterator)) | |
{ | |
$bestPosition = 0; | |
$all = $greedy; | |
foreach ($what as $item) | |
{ | |
if (is_string($item) && strlen($item) > 0 && ($position = strpos($input, $item, $offset)) !== false) | |
{ | |
if ($all || $position < $bestPosition) | |
{ | |
$bestPosition = $position; | |
$all = false; | |
} | |
} | |
} | |
if ($all) | |
{ | |
$consumedLength = $length - $offset; | |
return substr($input, $offset); | |
} | |
else | |
{ | |
return Parser::_ConsumeToPosition($input, $offset, $length, $bestPosition, $consumedLength); | |
} | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
private static function _ConsumeUntilLast(/*string*/ $input, /*bool*/ $greedy, /*int*/ $offset, /*int*/ $length, /*mixed*/ $what, /*int*/ &$consumedLength) | |
{ | |
if ($offset < $length) | |
{ | |
if (is_string($what)) | |
{ | |
if (strlen($what) > 0 && ($position = strrpos($input, $what, $offset)) !== false) | |
{ | |
return Parser::_ConsumeToPosition($input, $offset, $length, $position, $consumedLength); | |
} | |
if ($greedy) | |
{ | |
$consumedLength = $length - $offset; | |
return substr($input, $offset); | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
else if (is_array($what) || ($what instanceof \Iterator)) | |
{ | |
$bestPosition = 0; | |
$all = $greedy; | |
foreach ($what as $item) | |
{ | |
if (is_string($item) && strlen($item) > 0 && ($position = strrpos($input, $item, $offset)) !== false) | |
{ | |
if ($all || $position > $bestPosition) | |
{ | |
$bestPosition = $position; | |
$all = false; | |
} | |
} | |
} | |
if ($all) | |
{ | |
$consumedLength = $length - $offset; | |
return substr($input, $offset); | |
} | |
else | |
{ | |
return Parser::_ConsumeToPosition($input, $offset, $length, $bestPosition, $consumedLength); | |
} | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
private static function _ConsumeUntilCallback(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*function*/ $callback, /*int*/ &$consumedLength) | |
{ | |
if (is_callable($callback)) | |
{ | |
$consumedLength = 0; | |
if ($offset < $length) | |
{ | |
$result = ''; | |
while (true) | |
{ | |
$_input = UTF8::CharacterAt($input, $offset); | |
if ($_input === '') | |
{ | |
break; | |
} | |
else | |
{ | |
if (!call_user_func($callback, $_input)) | |
{ | |
$len = strlen($_input); | |
$consumedLength += $len; | |
$offset += $len; | |
$result .= $_input; | |
} | |
else | |
{ | |
break; | |
} | |
} | |
} | |
return $result; | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
else | |
{ | |
trigger_error('Invalid callback'); | |
} | |
} | |
private static function _ConsumeWhile(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*mixed*/ $what, /*int*/ &$consumedLength) | |
{ | |
$consumedLength = 0; | |
if ($offset < $length) | |
{ | |
$result = ''; | |
if (is_string($what)) | |
{ | |
while (true) | |
{ | |
$_input = UTF8::CharacterAt($input, $offset); | |
if ($_input === $what) | |
{ | |
$len = strlen($_input); | |
$consumedLength += $len; | |
$offset += $len; | |
$result .= $_input; | |
} | |
else | |
{ | |
break; | |
} | |
} | |
} | |
else if (is_array($what) || ($what instanceof \Iterator)) | |
{ | |
if ($what instanceof \Iterator) | |
{ | |
$what = iterator_to_array($what, false); | |
} | |
while (true) | |
{ | |
$_input = UTF8::CharacterAt($input, $offset); | |
if (in_array($_input, $what)) | |
{ | |
$len = strlen($_input); | |
$consumedLength += $len; | |
$offset += $len; | |
$result .= $_input; | |
} | |
else | |
{ | |
break; | |
} | |
} | |
} | |
return $result; | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
private static function _ConsumeWhileCallback(/*string*/ $input, /*int*/ $offset, /*int*/ $length, /*function*/ $callback, /*int*/ &$consumedLength) | |
{ | |
if (is_callable($callback)) | |
{ | |
$consumedLength = 0; | |
if ($offset < $length) | |
{ | |
$result = ''; | |
while (true) | |
{ | |
$_input = UTF8::CharacterAt($input, $offset); | |
if ($_input === '') | |
{ | |
break; | |
} | |
else | |
{ | |
if (call_user_func($callback, $_input)) | |
{ | |
$len = strlen($_input); | |
$consumedLength += $len; | |
$offset += $len; | |
$result .= $_input; | |
} | |
else | |
{ | |
break; | |
} | |
} | |
} | |
return $result; | |
} | |
else | |
{ | |
return ''; | |
} | |
} | |
else | |
{ | |
trigger_error('Invalid callback'); | |
} | |
} | |
//------------------------------------------------------------ | |
// Private (Instance) | |
//------------------------------------------------------------ | |
private $document; | |
private $documentPosition; | |
private $documentSize; | |
//------------------------------------------------------------ | |
// Public (Instance) | |
//------------------------------------------------------------ | |
/** | |
* Returns whatever or not there is more to consume | |
* | |
* @return bool | |
*/ | |
public function CanConsume() | |
{ | |
return $this->documentPosition < $this->documentSize; | |
} | |
/** | |
* Returns what has been consumed | |
* | |
* @return string | |
*/ | |
public function Consumed() | |
{ | |
return substr($this->document, 0, $this->documentPosition); | |
} | |
public function DocumentPosition() | |
{ | |
return $this->documentPosition; | |
} | |
/** | |
* Disposes everything that has been consumed. | |
* This is equivalent to creating a new Parser with what is yet to be consumed | |
* | |
* @return string | |
*/ | |
public function Flush() | |
{ | |
$document = $this->NotConsumed(); | |
$this->document = $document; | |
$this->documentSize = strlen($document); | |
$this->documentPosition = 0; | |
} | |
/** | |
* Returns what is yet to be consumed | |
* | |
* @return string | |
*/ | |
public function NotConsumed() | |
{ | |
return substr($this->document, $this->documentPosition); | |
} | |
/** | |
* Advances if what comes ahead matches $what and returns what was consumed | |
* | |
* @return string | |
*/ | |
public function Consume(/*mixed*/ $what = null) | |
{ | |
$result = Parser::_Consume($this->document, $this->documentPosition, $this->documentSize, $what, $consumedLength); | |
if ($result !== null) | |
{ | |
$this->documentPosition += $consumedLength; | |
} | |
return $result; | |
} | |
/** | |
* Advances to the end of the document and returns what was consumed | |
* | |
* @return string | |
*/ | |
public function ConsumeAll() | |
{ | |
$position = $this->documentPosition; | |
$this->documentPosition = $this->documentSize; | |
return substr($this->document, $position); | |
} | |
/** | |
* Advances if what is ahead satisfies $callback and returns what was consumed | |
* | |
* @return string | |
*/ | |
public function ConsumeCallback(/*mixed*/ $callback = null) | |
{ | |
$result = Parser::_ConsumeCallback($this->document, $this->documentPosition, $this->documentSize, $what, $consumedLength); | |
if ($result !== null) | |
{ | |
$this->documentPosition += $consumedLength; | |
} | |
return $result; | |
} | |
/** | |
* Advances until the consumed matches $what and returns what was consumed | |
* | |
* @return string | |
*/ | |
public function ConsumeUntil(/*mixed*/ $what, /*bool*/ $greedy = true) | |
{ | |
$result = Parser::_ConsumeUntil($this->document, $greedy, $this->documentPosition, $this->documentSize, $what, $consumedLength); | |
if ($result !== null) | |
{ | |
$this->documentPosition += $consumedLength; | |
} | |
return $result; | |
} | |
/** | |
* Advances until the consumed matches $what and returns what was consumed | |
* | |
* @return string | |
*/ | |
public function ConsumeUntilLast(/*mixed*/ $what, /*bool*/ $greedy = true) | |
{ | |
$result = Parser::_ConsumeUntilLast($this->document, $greedy, $this->documentPosition, $this->documentSize, $what, $consumedLength); | |
if ($result !== null) | |
{ | |
$this->documentPosition += $consumedLength; | |
} | |
return $result; | |
} | |
/** | |
* Advances until $callback yields true and returns what was consumed | |
* | |
* @return string | |
*/ | |
public function ConsumeUntilCallback(/*function*/ $callback) | |
{ | |
$result = Parser::_ConsumeUntilCallback($this->document, $this->documentPosition, $this->documentSize, $callback, $consumedLength); | |
if ($result !== null) | |
{ | |
$this->documentPosition += $consumedLength; | |
} | |
return $result; | |
} | |
/** | |
* Advances while the consumed matches $what | |
* | |
* @return string | |
*/ | |
public function ConsumeWhile(/*mixed*/ $what) | |
{ | |
$result = Parser::_ConsumeWhile($this->document, $this->documentPosition, $this->documentSize, $what, $consumedLength); | |
if ($result !== null) | |
{ | |
$this->documentPosition += $consumedLength; | |
} | |
return $result; | |
} | |
/** | |
* Advances while $callback yields true and returns what was consumed | |
* | |
* @return string | |
*/ | |
public function ConsumeWhileCallback(/*function*/ $callback) | |
{ | |
$result = Parser::_ConsumeWhileCallback($this->document, $this->documentPosition, $this->documentSize, $callback, $consumedLength); | |
if ($result !== null) | |
{ | |
$this->documentPosition += $consumedLength; | |
} | |
return $result; | |
} | |
/** | |
* Looks ahead in the document without advancing, returns what is ahead if it matches $what | |
* | |
* @return string | |
*/ | |
public function Peek(/*mixed*/ $what = null) | |
{ | |
return Parser::_Consume($this->document, $this->documentPosition, $this->documentSize, $what, $consumedLength); | |
} | |
/** | |
* Returns the Parser to the start of the document | |
*/ | |
public function Reset() | |
{ | |
$this->documentPosition = 0; | |
} | |
//------------------------------------------------------------ | |
// Public (Constructor) | |
//------------------------------------------------------------ | |
public function __construct(/*string*/ $document) | |
{ | |
if (is_string($document)) | |
{ | |
$this->document = $document; | |
$this->documentSize = strlen($document); | |
$this->documentPosition = 0; | |
} | |
else | |
{ | |
trigger_error('document must be string'); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment