Skip to content

Instantly share code, notes, and snippets.

@pyldin601
Last active October 22, 2016 09:53
Show Gist options
  • Save pyldin601/cf857fcbcdacbf180719818717f1aa4f to your computer and use it in GitHub Desktop.
Save pyldin601/cf857fcbcdacbf180719818717f1aa4f to your computer and use it in GitHub Desktop.
public static function tokenize($code)
{
// Initial state of parser
$baseIter = function ($rest, $acc) use (&$baseIter, &$symbolIter, &$stringIter, &$commentIter) {
if (sizeof($rest) == 0) {
return $acc;
}
list ($head, $tail) = toHeadTail($rest);
switch ($head) {
// We got '(', so we just add it to list of lexemes.
case self::TOKEN_OPEN_BRACKET:
return $baseIter($tail, append($acc, new OpenBracketToken));
// We got ')' and doing the same as in previous case.
case self::TOKEN_CLOSE_BRACKET:
return $baseIter($tail, append($acc, new CloseBracketToken));
// We got '"'! It means that we are at the beginning of the string
// and must switch our state to stringIter.
case self::TOKEN_DOUBLE_QUOTE:
return $stringIter($tail, '', $acc);
// We got ';'. It means that comment is starting here. So we
// change our state to commentIter.
case self::TOKEN_SEMICOLON:
return $commentIter($tail, '', $acc);
default:
// If current char is a delimiter, we just ignore it.
if (self::isDelimiter($head)) {
return $baseIter($tail, $acc);
}
// In all other cases we interpret current char as start
// of symbol and change our state to symbolIter
return $symbolIter($tail, $head, $acc);
}
};
// State when parser parses any symbol
$symbolIter = function ($rest, $buffer, $acc) use (&$symbolIter, &$baseIter, &$delimiterIter) {
if (sizeof($rest) > 0) {
list ($head, $tail) = toHeadTail($rest);
if (self::isSymbol($head)) {
return $symbolIter($tail, $buffer . $head, $acc);
}
}
$symbolToken = new SymbolToken($buffer);
return $baseIter($rest, append($acc, $symbolToken));
};
// State when parser parses string
$stringIter = function ($rest, $buffer, $acc) use (&$stringIter, &$baseIter, &$escapeIter) {
if (sizeof($rest) == 0) {
throw new TokenizerException("Unexpected end of string");
}
list ($head, $tail) = toHeadTail($rest);
if ($head == self::TOKEN_DOUBLE_QUOTE) {
return $baseIter($tail, append($acc, new StringToken($buffer)));
}
if ($head == Tokenizer::TOKEN_BACK_SLASH) {
return $escapeIter($tail, $buffer, $acc);
}
return $stringIter($tail, $buffer . $head, $acc);
};
// State when parser parses escaped symbol
$escapeIter = function ($rest, $buffer, $acc) use (&$stringIter) {
if (sizeof($rest) == 0) {
throw new TokenizerException("Unused escape character");
}
list ($head, $tail) = toHeadTail($rest);
return $stringIter($tail, $buffer . $head, $acc);
};
// State when parser ignores comments
$commentIter = function ($rest, $buffer, $acc) use (&$commentIter, &$baseIter) {
if (sizeof($rest) > 0) {
list ($head, $tail) = toHeadTail($rest);
if ($head != Tokenizer::TOKEN_NEW_LINE) {
return $commentIter($tail, $buffer . $head, $acc);
}
}
return $baseIter($rest, append($acc, new CommentToken($buffer)));
};
// todo: to be or not to be
$delimiterIter = function ($rest, $buffer, $acc) use (&$delimiterIter, &$baseIter) {
if (sizeof($rest) > 0) {
list ($head, $tail) = toHeadTail($rest);
if (self::isDelimiter($head)) {
return $delimiterIter($tail, $buffer . $head, $acc);
}
}
return $baseIter($rest, append($acc, new DelimiterToken($buffer)));
};
return $baseIter(str_split($code), []);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment