Skip to content

Instantly share code, notes, and snippets.

@paranoiq
Created December 18, 2019 20:24
Show Gist options
  • Save paranoiq/6f98e8ef174721c0af6d02ad9085d742 to your computer and use it in GitHub Desktop.
Save paranoiq/6f98e8ef174721c0af6d02ad9085d742 to your computer and use it in GitHub Desktop.
<?php declare(strict_types = 1);
/**
* This file is part of the SqlFtw library (https://github.com/sqlftw)
*
* Copyright (c) 2017 Vlasta Neubauer (@paranoiq)
*
* For the full copyright and license information read the file 'license.md', distributed with this source code
*/
// phpcs:disable Squiz.Arrays.ArrayDeclaration.ValueNoNewline
namespace SqlFtw\Parser\Lexer;
use Dogma\StrictBehaviorMixin;
use Generator;
use SqlFtw\Parser\ParserException;
use SqlFtw\Parser\Token;
use SqlFtw\Parser\TokenType;
use SqlFtw\Platform\Mode;
use SqlFtw\Platform\PlatformSettings;
use function array_flip;
use function array_keys;
use function array_merge;
use function array_values;
use function implode;
use function ltrim;
use function ord;
use function preg_match;
use function rtrim;
use function str_replace;
use function strlen;
use function strpos;
use function strtolower;
use function strtoupper;
use function substr;
use function trim;
/**
* todo:
* - Date and Time Literals?
* - Mysql string charset declaration (_utf* & N)
* - \N is synonym for NULL (until 8.0)
*/
class Lexer
{
use StrictBehaviorMixin;
private const NUMBERS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
private const LETTERS = [
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
];
private const OPERATOR_SYMBOLS = ['!', '%', '&', '*', '+', '-', '/', ':', '<', '=', '>', '\\', '^', '|', '~'];
public const UUID_REGEXP = '/^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i';
/** @var int[] */
private static $numbersKey;
/** @var int[] */
private static $hexadecKey;
/** @var int[] */
private static $nameCharsKey;
/** @var int[] */
private static $operatorSymbolsKey;
/** @var \SqlFtw\Platform\PlatformSettings */
private $settings;
/** @var bool */
private $withComments;
/** @var bool */
private $withWhitespace;
/**
* @param \SqlFtw\Platform\PlatformSettings $settings
* @param bool $withComments
* @param bool $withWhitespace
*/
public function __construct(
PlatformSettings $settings,
bool $withComments = true,
bool $withWhitespace = false
) {
self::$numbersKey = array_flip(self::NUMBERS);
self::$hexadecKey = array_flip(array_merge(self::NUMBERS, ['A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'F', 'f']));
self::$nameCharsKey = array_flip(array_merge(self::LETTERS, self::NUMBERS, ['$', '_']));
self::$operatorSymbolsKey = array_flip(self::OPERATOR_SYMBOLS);
$this->settings = $settings;
$this->withComments = $withComments;
$this->withWhitespace = $withWhitespace;
}
/**
* Tokenize SQL code. Expects line endings to be converted to "\n" and UTF-8 encoding.
* @param string $string
* @return \SqlFtw\Parser\Token[]
*/
public function tokenizeAll(string $string): array
{
$tokens = [];
foreach ($this->tokenize($string) as $token) {
$tokens[] = $token;
}
return $tokens;
}
/**
* Tokenize SQL code. Expects line endings to be converted to "\n" and UTF-8 encoding.
* @param string $string
* @return \SqlFtw\Parser\Token[]|\Generator
*/
public function tokenize(string $string): Generator
{
$length = strlen($string);
$position = 0;
$row = 1;
$column = 1;
$features = $this->settings->getPlatform()->getFeatures();
$reservedKey = array_flip($features->getReservedWords());
$keywordsKey = array_flip($features->getNonReservedWords());
$operatorKeywordsKey = array_flip($features->getOperatorKeywords());
$delimiter = $this->settings->getDelimiter();
/** @var \SqlFtw\Parser\Token|null $previous */
$previous = null;
$condition = null;
while ($position < $length) {
$uuidCheck = false;
$char = $string[$position];
$startPosition = $position;
$position++;
$column++;
if ($char === $delimiter[0]) {
do {
$delimiterLength = strlen($delimiter);
for ($n = 1; $n < $delimiterLength; $n++) {
if ($position + $n >= $length || $string[$position + $n] !== $delimiter[$n]) {
break 2;
}
}
yield new Token(TokenType::SYMBOL | TokenType::DELIMITER, $startPosition, $delimiter, null, $condition);
continue 2;
} while (false);
}
switch ($char) {
case ' ':
case "\t":
case "\r":
case "\n":
$value = $char;
while ($position < $length) {
$next = $string[$position];
if ($next === ' ' || $next === "\t" || $next === "\r") {
$value .= $next;
$position++;
$column++;
} elseif ($next === "\n") {
$value .= $next;
$position++;
$column = 1;
$row++;
} else {
break;
}
}
if ($this->withWhitespace) {
yield new Token(TokenType::WHITESPACE, $startPosition, $value, null, $condition);
}
break;
case '(':
yield $previous = new Token(TokenType::SYMBOL | TokenType::LEFT_PARENTHESIS, $startPosition, $char, null, $condition);
break;
case ')':
yield $previous = new Token(TokenType::SYMBOL | TokenType::RIGHT_PARENTHESIS, $startPosition, $char, null, $condition);
break;
case '[':
yield $previous = new Token(TokenType::SYMBOL | TokenType::LEFT_SQUARE_BRACKET, $startPosition, $char, null, $condition);
break;
case ']':
yield $previous = new Token(TokenType::SYMBOL | TokenType::RIGHT_SQUARE_BRACKET, $startPosition, $char, null, $condition);
break;
case '{':
yield $previous = new Token(TokenType::SYMBOL | TokenType::LEFT_CURLY_BRACKET, $startPosition, $char, null, $condition);
break;
case '}':
yield $previous = new Token(TokenType::SYMBOL | TokenType::RIGHT_CURLY_BRACKET, $startPosition, $char, null, $condition);
break;
case ',':
yield $previous = new Token(TokenType::SYMBOL | TokenType::COMMA, $startPosition, $char, null, $condition);
break;
case ';':
yield $previous = new Token(TokenType::SYMBOL | TokenType::SEMICOLON, $startPosition, $char, null, $condition);
break;
case ':':
$value = $char;
while ($position < $length) {
$next = $string[$position];
if (isset(self::$operatorSymbolsKey[$next])) {
$value .= $next;
$position++;
$column++;
} else {
break;
}
}
if ($value !== ':') {
yield $previous = new Token(TokenType::SYMBOL | TokenType::OPERATOR, $startPosition, $value, null, $condition);
} else {
yield $previous = new Token(TokenType::SYMBOL | TokenType::DOUBLE_COLON, $startPosition, $char, null, $condition);
}
break;
case '*':
// /*!12345 ... */
if ($position < $length && $condition && $string[$position] === '/') {
$condition = null;
$position++;
$column++;
break;
}
// continue
case '!':
case '%':
case '&':
case '<':
case '=':
case '>':
case '\\':
case '^':
case '|':
case '~':
$value = $char;
while ($position < $length) {
$next = $string[$position];
if (isset(self::$operatorSymbolsKey[$next])) {
$value .= $next;
$position++;
$column++;
} else {
break;
}
}
yield $previous = new Token(TokenType::SYMBOL | TokenType::OPERATOR, $startPosition, $value, null, $condition);
break;
case '?':
yield $previous = new Token(TokenType::SYMBOL | TokenType::PLACEHOLDER, $startPosition, $char, null, $condition);
break;
case '@':
if ($previous !== null && ($previous->type & (TokenType::STRING | TokenType::NAME))) {
// user @ host
yield $previous = new Token(TokenType::SYMBOL | TokenType::OPERATOR, $startPosition, $char, null, $condition);
break;
}
// @variable
$value = $char;
while ($position < $length) {
$next = $string[$position];
if ($next === '@' || isset(self::$nameCharsKey[$next]) || ord($next) > 127) {
$value .= $next;
$position++;
$column++;
} else {
break;
}
}
yield new Token(TokenType::NAME | TokenType::AT_VARIABLE, $startPosition, $value, null, $condition);
break;
case '#':
// # comment
$value = $char;
while ($position < $length) {
$next = $string[$position];
if ($next === "\n") {
$value .= $next;
$position++;
$column = 1;
$row++;
break;
} else {
$value .= $next;
$position++;
$column++;
}
}
if ($this->withComments) {
yield $previous = new Token(TokenType::COMMENT | TokenType::HASH_COMMENT, $startPosition, $value, null, $condition);
}
break;
case '/':
$next = $position < $length ? $string[$position] : '';
if ($next === '/') {
// // comment
$position++;
$column++;
$value = $char . $next;
while ($position < $length) {
$next = $string[$position];
if ($next === "\n") {
$value .= $next;
$position++;
$column = 1;
$row++;
break;
} else {
$value .= $next;
$position++;
$column++;
}
}
if ($this->withComments) {
yield $previous = new Token(TokenType::COMMENT | TokenType::DOUBLE_SLASH_COMMENT, $startPosition, $value, null, $condition);
}
} elseif ($next === '*') {
$position++;
$column++;
if ($condition !== null) {
throw new ParserException('Comment inside conditional comment');
}
//$column = $string->column;
//$row = $string->row;
$value = $char . $next;
$ok = false;
while ($position < $length) {
$next = $string[$position];
if ($next === '*' && ($position + 1 < $length) && $string[$position + 1] === '/') {
$value .= $next . $string[$position + 1];
$position += 2;
$column += 2;
$ok = true;
break;
} elseif ($next === "\n") {
$value .= $next;
$position++;
$column = 0;
$row++;
} else {
$value .= $next;
$position++;
$column++;
}
}
if (!$ok) {
throw new EndOfCommentNotFoundException(''); // todo
}
if ($value[2] === '!') {
// /*!12345 comment */
$versionId = (int) trim(substr($value, 2, 6));
if ($this->settings->getPlatform()->hasOptionalComments()
&& ($versionId === 0 || $versionId <= $this->settings->getPlatform()->getVersion()->getId())
) {
// todo: conditional comments
$condition = 'todo';
} else {
yield new Token(TokenType::COMMENT | TokenType::BLOCK_COMMENT | TokenType::OPTIONAL_COMMENT, $startPosition, $value);
}
} elseif ($value[2] === '+') {
// /*+ comment */
yield new Token(TokenType::COMMENT | TokenType::BLOCK_COMMENT | TokenType::HINT_COMMENT, $startPosition, $value);
} else {
// /* comment */
if ($this->withComments) {
yield new Token(TokenType::COMMENT | TokenType::BLOCK_COMMENT, $startPosition, $value);
}
}
} else {
yield $previous = new Token(TokenType::SYMBOL | TokenType::OPERATOR, $startPosition, $char, null, $condition);
}
break;
case '"':
[$value, $orig] = $this->parseString($string, $position, $column, $row, $char);
if ($this->settings->getMode()->contains(Mode::ANSI_QUOTES)) {
yield $previous = new Token(TokenType::NAME | TokenType::DOUBLE_QUOTED_STRING, $startPosition, $value, $orig, $condition);
} else {
yield $previous = new Token(TokenType::VALUE | TokenType::STRING | TokenType::DOUBLE_QUOTED_STRING, $startPosition, $value, $orig, $condition);
}
break;
case '\'':
[$value, $orig] = $this->parseString($string, $position, $column, $row, $char);
yield $previous = new Token(TokenType::VALUE | TokenType::STRING | TokenType::SINGLE_QUOTED_STRING, $startPosition, $value, $orig, $condition);
break;
case '`':
[$value, $orig] = $this->parseString($string, $position, $column, $row, $char);
yield $previous = new Token(TokenType::NAME | TokenType::BACKTICK_QUOTED_STRING, $startPosition, $value, $orig, $condition);
break;
case '.':
$next = $position < $length ? $string[$position] : '';
if (isset(self::$numbersKey[$next])) {
[$value, $orig] = $this->parseNumber($string, $position, $column, $row, '.');
if ($value !== null) {
yield $previous = new Token(TokenType::VALUE | TokenType::NUMBER, $startPosition, $value, $orig, $condition);
break;
}
}
yield $previous = new Token(TokenType::SYMBOL | TokenType::DOT, $startPosition, $char, null, $condition);
break;
case '-':
$next = $position < $length ? $string[$position] : '';
if ($next === '-') {
$position++;
$column++;
$value = $char . $next;
while ($position < $length) {
$next = $string[$position];
if ($next === "\n") {
$value .= $next;
$position++;
$column = 1;
$row++;
break;
} else {
$value .= $next;
$position++;
$column++;
}
}
yield $previous = new Token(TokenType::COMMENT | TokenType::DOUBLE_HYPHEN_COMMENT, $startPosition, $value, null, $condition);
break;
}
if (isset(self::$numbersKey[$next])) {
[$value, $orig] = $this->parseNumber($string, $position, $column, $row, '-');
if ($value !== null) {
yield $previous = new Token(TokenType::VALUE | TokenType::NUMBER, $startPosition, $value, $orig, $condition);
break;
}
}
$value = $char;
while ($position < $length) {
$next = $string[$position];
if (isset(self::$operatorSymbolsKey[$next])) {
$value .= $next;
$position++;
$column++;
} else {
break;
}
}
yield $previous = new Token(TokenType::SYMBOL | TokenType::OPERATOR, $startPosition, $value, null, $condition);
break;
case '+':
$next = $position < $length ? $string[$position] : '';
if (isset(self::$numbersKey[$next])) {
[$value, $orig] = $this->parseNumber($string, $position, $column, $row, '+');
if ($value !== null) {
yield $previous = new Token(TokenType::VALUE | TokenType::NUMBER, $startPosition, $value, $orig, $condition);
break;
}
}
$value = $char;
while ($position < $length) {
$next = $string[$position];
if (isset(self::$operatorSymbolsKey[$next])) {
$value .= $next;
$position++;
$column++;
} else {
break;
}
}
yield $previous = new Token(TokenType::SYMBOL | TokenType::OPERATOR, $startPosition, $value, null, $condition);
break;
case '0':
$next = $position < $length ? $string[$position] : '';
if ($next === 'b') {
$position++;
$column++;
$bits = '';
while ($position < $length) {
$next = $string[$position];
if ($next === '0' || $next === '1') {
$bits .= $next;
$position++;
$column++;
} else {
$orig = $char . 'b' . $bits;
yield $previous = new Token(TokenType::VALUE | TokenType::BINARY_LITERAL, $startPosition, $bits, $orig, $condition);
break 2;
}
}
} elseif ($next === 'x') {
$position++;
$column++;
$bits = '';
while ($position < $length) {
$next = $string[$position];
if (isset(self::$hexadecKey[$next])) {
$bits .= $next;
$position++;
$column++;
} else {
$orig = $char . 'x' . $bits;
yield $previous = new Token(TokenType::VALUE | TokenType::HEXADECIMAL_LITERAL, $startPosition, strtolower($bits), $orig, $condition);
break 2;
}
}
}
// continue
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
$uuidCheck = true;
$value = substr($string, $position - 1, 36);
// UUID
if (strlen($value) === 36 && preg_match(self::UUID_REGEXP, $value)) {
$position += 35;
$column += 35;
yield $previous = new Token(TokenType::VALUE | TokenType::UUID, $startPosition, $value, null, $condition);
break;
}
[$value, $orig] = $this->parseNumber($string, $position, $column, $row, $char);
if ($value !== null) {
yield $previous = new Token(TokenType::VALUE | TokenType::NUMBER, $startPosition, $value, $orig, $condition);
break;
}
// continue
case 'B':
case 'b':
// b'01'
// B'01'
if (($char === 'B' || $char === 'b') && $position < $length && $string[$position] === '\'') {
$position++;
$column++;
$bits = '';
while ($position < $length) {
/** @var string $next */
$next = $string[$position];
if ($next === '0' || $next === '1') {
$bits .= $next;
$position++;
$column++;
} elseif ($next === '\'') {
$position++;
$column++;
$orig = $char . '\'' . $bits . '\'';
yield $previous = new Token(TokenType::VALUE | TokenType::BINARY_LITERAL, $startPosition, $bits, $orig, $condition);
break;
} else {
throw new ExpectedTokenNotFoundException(''); // todo
}
}
break;
}
// continue
case 'A':
case 'a':
case 'C':
case 'c':
case 'D':
case 'd':
case 'E':
case 'e':
case 'F':
case 'f':
if (!$uuidCheck) {
$value = substr($string, $position - 1, 36);
// UUID
if (strlen($value) === 36 && preg_match(self::UUID_REGEXP, $value)) {
$position += 35;
$column += 35;
yield $previous = new Token(TokenType::VALUE | TokenType::UUID, $startPosition, $value, null, $condition);
break;
}
}
// continue
case 'X':
case 'x':
if (($char === 'X' || $char === 'x') && $position < $length && $string[$position] === '\'') {
$position++;
$column++;
$bits = '';
while ($position < $length) {
$next = $string[$position];
if (isset(self::$hexadecKey[$next])) {
$bits .= $next;
$position++;
$column++;
} elseif ($next === '\'') {
$position++;
$column++;
$orig = $char . '\'' . $bits . '\'';
if ((strlen($bits) % 2) === 1) {
throw new ExpectedTokenNotFoundException(''); // todo
}
yield $previous = new Token(TokenType::VALUE | TokenType::HEXADECIMAL_LITERAL, $startPosition, strtolower($bits), $orig, $condition);
break;
} else {
throw new ExpectedTokenNotFoundException(''); // todo
}
}
break;
}
// continue
case 'G':
case 'g':
case 'H':
case 'h':
case 'I':
case 'i':
case 'J':
case 'j':
case 'K':
case 'k':
case 'L':
case 'l':
case 'M':
case 'm':
case 'N':
// todo: charset declaration
case 'n':
case 'O':
case 'o':
case 'P':
case 'p':
case 'Q':
case 'q':
case 'R':
case 'r':
case 'S':
case 's':
case 'T':
case 't':
case 'U':
case 'u':
case 'V':
case 'v':
case 'W':
case 'w':
case 'Y':
case 'y':
case 'Z':
case 'z':
case '$':
$value = $char;
while ($position < $length) {
$next = $string[$position];
if (isset(self::$nameCharsKey[$next]) || ord($next) > 127) {
$value .= $next;
$position++;
$column++;
} else {
break;
}
}
$upper = strtoupper($value);
if ($upper === 'NULL') {
yield $previous = new Token(TokenType::KEYWORD | TokenType::VALUE, $startPosition, 'NULL', $value, $condition);
} elseif ($upper === 'TRUE') {
yield $previous = new Token(TokenType::KEYWORD | TokenType::VALUE, $startPosition, 'TRUE', $value, $condition);
} elseif ($upper === 'FALSE') {
yield $previous = new Token(TokenType::KEYWORD | TokenType::VALUE, $startPosition, 'FALSE', $value, $condition);
} elseif (isset($reservedKey[$upper])) {
if (isset($operatorKeywordsKey[$upper])) {
yield $previous = new Token(TokenType::KEYWORD | TokenType::RESERVED | TokenType::OPERATOR, $startPosition, $upper, $value, $condition);
} else {
yield $previous = new Token(TokenType::KEYWORD | TokenType::RESERVED, $startPosition, $upper, $value, $condition);
}
} elseif (isset($keywordsKey[$upper])) {
yield $previous = new Token(TokenType::KEYWORD | TokenType::NAME | TokenType::UNQUOTED_NAME, $startPosition, $upper, $value, $condition);
} elseif ($upper === 'DELIMITER' && $this->settings->getPlatform()->hasUserDelimiter()) {
yield new Token(TokenType::KEYWORD, $startPosition, $upper, $value, $condition);
$startPosition = $position;
$whitespace = $this->parseWhitespace($string, $position, $column, $row);
if ($this->withWhitespace) {
yield new Token(TokenType::WHITESPACE, $startPosition, $whitespace, null, $condition);
}
$startPosition = $position;
$del = '';
while ($position < $length) {
$next = $string[$position];
if ($next === ';' || isset(self::$operatorSymbolsKey[$next])) {
$del .= $next;
$position++;
$column++;
} else {
break;
}
}
if ($del === '') {
throw new ExpectedTokenNotFoundException(''); // todo
}
$delimiter = $del;
$this->settings->setDelimiter($delimiter);
yield $previous = new Token(TokenType::SYMBOL | TokenType::DELIMITER_DEFINITION, $startPosition, $delimiter, $condition);
} else {
yield $previous = new Token(TokenType::NAME | TokenType::UNQUOTED_NAME, $startPosition, $value, $value, $condition);
}
break;
case '_':
// todo: charset declaration
default:
if (ord($char) < 32) {
throw new InvalidCharacterException($char, $startPosition, ''); // todo
}
$value = $char;
while ($position < $length) {
$next = $string[$position];
if (isset(self::$nameCharsKey[$next]) || ord($next) > 127) {
$value .= $next;
$position++;
$column++;
} else {
break;
}
}
yield $previous = new Token(TokenType::NAME | TokenType::UNQUOTED_NAME, $startPosition, $value, $value, $condition);
}
}
}
private function parseWhitespace(string &$string, int &$position, int &$column, int &$row): string
{
$length = strlen($string);
$whitespace = '';
while ($position < $length) {
$next = $string[$position];
if ($next === ' ' || $next === "\t" || $next === "\r") {
$whitespace .= $next;
$position++;
$column++;
} elseif ($next === "\n") {
$whitespace .= $next;
$position++;
$column = 1;
$row++;
} else {
break;
}
}
return $whitespace;
}
/**
* @param string $string
* @param int $position
* @param int $column
* @param int $row
* @param string $quote
* @return string[] ($value, $orig)
*/
private function parseString(string &$string, int &$position, int &$column, int &$row, string $quote): array
{
$length = strlen($string);
$backslashes = !$this->settings->getMode()->contains(Mode::NO_BACKSLASH_ESCAPES);
$orig = [$quote];
$escaped = false;
$finished = false;
while ($position < $length) {
$next = $string[$position];
if ($next === $quote) {
$orig[] = $next;
$position++;
$column++;
if ($escaped) {
$escaped = false;
} elseif ($position < $length && $string[$position] === $quote) {
$escaped = true;
} else {
$finished = true;
break;
}
} elseif ($next === "\n") {
$orig[] = $next;
$position++;
$column = 1;
$row++;
} elseif ($backslashes && $next === '\\') {
$escaped = !$escaped;
$orig[] = $next;
$position++;
$column++;
} elseif ($escaped && $next !== '\\' && $next !== $quote) {
$escaped = false;
$orig[] = $next;
$position++;
$column++;
} else {
$orig[] = $next;
$position++;
$column++;
}
}
if (!$finished) {
throw new EndOfStringNotFoundException(''); // todo
}
$orig = implode('', $orig);
$value = $this->unescapeString($orig, $quote);
return [$value, $orig];
}
/**
* NO_BACKSLASH_ESCAPES mode:
* Disable the use of the backslash character (\) as an escape character within strings.
* With this mode enabled, backslash becomes an ordinary character like any other.
*
* \0 An ASCII NUL (X'00') character
* \' A single quote (') character
* \" A double quote (") character
* \b A backspace character
* \n A newline (linefeed) character
* \r A carriage return character
* \t A tab character
* \Z ASCII 26 (Control+Z)
* \\ A backslash (\) character
*
* (do not unescape. keep original for LIKE)
* \% A % character
* \_ A _ character
*
* A ' inside a string quoted with ' may be written as ''.
* A " inside a string quoted with " may be written as "".
*/
private function unescapeString(string $string, string $quote): string
{
$translations = [
'\\0' => "\x00",
'\\\'' => '\'',
'\\""' => '""',
'\\b' => "\x08",
'\\n' => "\n",
'\\r' => "\r",
'\\t' => "\t",
'\\Z' => "\x1A",
'\\\\' => '\\',
];
$string = substr($string, 1, -1);
$string = str_replace($quote . $quote, $quote, $string);
if (!$this->settings->getMode()->contains(Mode::NO_BACKSLASH_ESCAPES)) {
$string = str_replace(array_keys($translations), array_values($translations), $string);
// todo: ???
}
return $string;
}
/**
* @param string $string
* @param int $position
* @param int $column
* @param int $row
* @param string $start
* @return int[]|float[]|string[]|null[] (int|float|string|null $value, string|null $orig)
*/
private function parseNumber(string &$string, int &$position, int &$column, int &$row, string $start): array
{
$length = strlen($string);
$offset = 0;
$num = isset(self::$numbersKey[$start]);
$base = $start;
do {
// integer
$next = '';
while ($position + $offset < $length) {
$next = $string[$position + $offset];
if (isset(self::$numbersKey[$next])) {
$base .= $next;
$offset++;
$num = true;
} else {
break;
}
}
if ($position + $offset >= $length) {
$exp = '';
break;
}
// decimal part
if ($next === '.') {
if ($start !== '.') {
$base .= $next;
$offset++;
while ($position + $offset < $length) {
$next = $string[$position + $offset];
if (isset(self::$numbersKey[$next])) {
$base .= $next;
$offset++;
$num = true;
} else {
break;
}
}
} else {
break;
}
}
if ($position + $offset >= $length) {
$exp = '';
break;
}
// exponent
$next = $string[$position + $offset];
$exp = '';
do {
if ($next === 'e' || $next === 'E') {
$exp = $next;
$offset++;
$next = $position + $offset < $length ? $string[$position + $offset] : '';
$expComplete = false;
if ($next === '+' || $next === '-' || isset(self::$numbersKey[$next])) {
$exp .= $next;
$offset++;
if (isset(self::$numbersKey[$next])) {
$expComplete = true;
}
}
while ($position + $offset < $length) {
$next = $string[$position + $offset];
if (isset(self::$numbersKey[$next])) {
$exp .= $next;
$offset++;
$expComplete = true;
} else {
if (trim($exp, 'e+-') === '' && strpos($base, '.') !== false) {
throw new ExpectedTokenNotFoundException(''); // todo
}
break;
}
}
if (!$expComplete) {
throw new ExpectedTokenNotFoundException(''); // todo
}
} elseif (isset(self::$nameCharsKey[$next]) || ord($next) > 127) {
$num = false;
break 2;
}
} while (false);
} while (false);
if (!$num) {
return [null, null];
}
$orig = $base;
$value = strtolower(rtrim(ltrim($base, '+'), '.'));
if ($value[0] === '.') {
$value = '0' . $value;
}
if ($exp !== '') {
$orig .= $exp;
$value = (string) (float) ($value . $exp);
}
if ($value === (string) (int) $value) {
$value = (int) $value;
} elseif ($value === (string) (float) $value) {
$value = (float) $value;
}
$len = strlen($orig) - 1;
$position += $len;
$column += $len;
return [$value, $orig];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment