Skip to content

Instantly share code, notes, and snippets.

@jm42
Created September 21, 2014 15:33
Show Gist options
  • Save jm42/b5380d71adc114ad73fa to your computer and use it in GitHub Desktop.
Save jm42/b5380d71adc114ad73fa to your computer and use it in GitHub Desktop.
Feature-like Language
<?php
$example = <<<EOS
This is the title, a free-form text
that can have as many lines
of text as I wish.
Play Wikipedia as jm
Spec main
Given the article
When the title is 'Paris'
Then continue to next link
And click
Spec weight
Given a word
Then set ret to count word
And return ret
EOS;
$lexer = new Lexer($example);
$parser = new Parser($lexer);
print_r($parser->parse());
<?php
class Lexer {
/* Read and transform the source into tokens. */
const TOKEN = " \t\n"; /* Used to split the input into tokens */
const T_NONE = 0;
const T_IDEN = 1;
/* 1x base */
const T_THIS = 10;
const T_PLAY = 11;
const T_AS = 12;
const T_SPEC = 13;
const T_AND = 14;
const T_GIVEN = 15;
const T_WHEN = 16;
const T_THEN = 17;
/* 2x objects */
const T_A = 20;
const T_THE = 21;
/* 3x positions */
const T_FIRST = 30;
const T_PREVIOUS = 31;
const T_NEXT = 32;
const T_LAST = 33;
/* 4x conditionals */
const T_IS = 40;
const T_NOT = 41;
/* 5x actions */
const T_TO = 50;
private $token; /* Current token in the current position */
private $position = 0; /* Current position in the input */
private $peek = array();
public function __construct($input) {
$this->token = strtok($input, static::TOKEN);
}
public function peek($n=1) {
if (count($this->peek) < $n)
$this->peek[] = strtok(static::TOKEN);
return $this->peek[$n - 1];
}
public function current() {
return $this->token;
}
public function next() {
$this->position++;
if (count($this->peek) > 0)
$this->token = array_shift($this->peek);
else
$this->token = strtok(static::TOKEN);
}
public function key() {
$value = $this->current();
$key = static::T_NONE;
if (ctype_alpha($value[0])) {
$name = 'static::T_' . strtoupper($value);
if (defined($name)) {
return constant($name);
}
return self::T_IDEN;
}
return $key;
}
public function valid() {
return false !== $this->token;
}
}
class Parser {
private $lexer; /* It feed us tokens */
public function __construct(Lexer $lexer) {
$this->lexer = $lexer;
}
public function parse() {
/* Parses the input and returns a tree structure. */
return $this->parseTame();
}
protected function throwSyntaxError($expected=null, $n=0) {
if ($n > 0) {
$curr = $this->lexer->peek($n);
$next = $this->lexer->peek($n + 1);
} else {
$curr = $this->lexer->current();
$next = $this->lexer->peek();
}
$message = 'Error: ';
$message .= $expected ? "Expected {$expected}, got" : 'Unexpected';
$message .= $next ? " '{$curr}'." : ' EOS.';
throw new \RuntimeException($message);
}
protected function match($token) {
if ($this->lexer->key() !== $token)
$this->throwSyntaxError($token);
$this->lexer->next();
}
/**
* Tame ::= This Play [Section]
*/
private function parseTame() {
$ast = array(
$this->parseThis(),
$this->parsePlay()
);
while ($this->lexer->key() === Lexer::T_SPEC) {
$ast[] = $this->parseSpec();
}
if ($this->lexer->valid())
$this->throwSyntaxError('END');
return $ast;
}
/**
* This ::= "This" [data]*
*/
private function parseThis() {
$data = 'This';
$this->match(Lexer::T_THIS);
while ($this->lexer->key() !== Lexer::T_PLAY) {
$data .= ' ' . $this->lexer->current();
$this->lexer->next();
}
return array('type' => 'This', 'data' => $data);
}
/**
* Play ::= "Play" <game> "as" <user>
*/
private function parsePlay() {
$this->match(Lexer::T_PLAY);
$game = $this->lexer->current();
$this->match(Lexer::T_IDEN);
$this->match(Lexer::T_AS);
$user = $this->lexer->current();
$this->match(Lexer::T_IDEN);
return array('type' => 'Play', 'data' => array(
'game' => $game,
'user' => $user,
));
}
/**
* Spec ::= "Spec" <name> [Given]* [When]* {Then}*
*/
private function parseSpec() {
$this->match(Lexer::T_SPEC);
$name = $this->lexer->current();
$this->match(Lexer::T_IDEN);
$given = array();
$when = array();
$then = array();
$last = null;
while ($this->lexer->key() !== Lexer::T_SPEC && $this->lexer->valid()) {
$token = $this->lexer->key();
if (Lexer::T_AND === $token) {
if (null === $last)
$this->throwSyntaxError('GIVEN, WHEN or THEN');
$token = $last;
}
switch ($token) {
case Lexer::T_GIVEN:
$given[] = $this->parseGiven();
break;
case Lexer::T_WHEN:
$when[] = $this->parseWhen();
break;
case Lexer::T_THEN:
$then[] = $this->parseThen();
break;
default:
$this->throwSyntaxError('GIVEN, WHEN, THEN or AND');
}
$last = $token;
}
return array('type' => 'Spec', 'data' => array(
'name' => $name,
'given' => $given,
'when' => $when,
'then' => $then,
));
}
/**
* Given ::= ( "Given" | "And" ) Object
*/
private function parseGiven() {
$token = $this->lexer->key();
if (!in_array($token, array(Lexer::T_GIVEN, Lexer::T_AND)))
$this->syntaxError('GIVEN or AND');
$this->match($token);
$data = $this->parseObject();
return array('type' => 'Given', 'data' => array(
'object' => $data,
));
}
/**
* When ::= ( "When" | "And" ) ( Action | Object ) Condition
*/
private function parseWhen() {
$token = $this->lexer->key();
if (!in_array($token, array(Lexer::T_WHEN, Lexer::T_AND)))
$this->syntaxError('WHEN or AND');
$this->match($token);
$object = null;
$action = null;
if (Lexer::T_IDEN === $this->lexer->key())
$action = $this->parseAction();
else
$object = $this->parseObject();
$cond = $this->parseCondition();
return array('type' => 'When', 'data' => array(
'action' => $action,
'object' => $object,
'condition' => $cond,
));
}
/**
* Then ::= ( "Then" | "And" ) Action
*/
private function parseThen() {
$token = $this->lexer->key();
if (!in_array($token, array(Lexer::T_THEN, Lexer::T_AND)))
$this->syntaxError('WHEN or AND');
$this->match($token);
$action = $this->parseAction();
return array('type' => 'Then', 'data' => array(
'action' => $action,
));
}
/**
* Action ::= <name> [<param>]* ["TO" ( Action | Object )]
*/
private function parseAction() {
$name = $this->lexer->current();
$this->match(Lexer::T_IDEN);
$params = array();
while (Lexer::T_IDEN === $this->lexer->key()) {
$params[] = $this->lexer->current();
$this->lexer->next();
}
$object = null;
$action = null;
if (Lexer::T_TO === $this->lexer->key()) {
$this->match(Lexer::T_TO);
if (Lexer::T_IDEN === $this->lexer->key())
$action = $this->parseAction();
else
$object = $this->parseObject();
}
return array('type' => 'Action', 'data' => array(
'name' => $name,
'params' => $params,
'object' => $object,
'action' => $action,
));
}
/**
* Object ::= ( "A" | "THE" | Position ) <name>
* Position ::= ( "FIRST" | "PREVIOUS" | "NEXT" | "LAST" )
*/
private function parseObject() {
$token = $this->lexer->key();
$expected = array(Lexer::T_A, Lexer::T_THE,
Lexer::T_FIRST, Lexer::T_PREVIOUS,
Lexer::T_NEXT, Lexer::T_LAST);
if (!in_array($token, $expected))
$this->throwSyntaxError('A, THE, FIRST, PREVIOUS, NEXT or LAST');
$this->match($token);
$name = $this->lexer->current();
$this->match(Lexer::T_IDEN);
return array('type' => 'Object', 'data' => array(
'type' => $token, // FIXME make me a literal
'name' => $name,
));
}
/**
* Condition ::= "IS" ["NOT"] <literal>
*/
private function parseCondition() {
$this->match(Lexer::T_IS);
$nega = Lexer::T_NOT === $this->lexer->key();
if ($nega)
$this->lexer->next();
$data = $this->parseLiteral();
return array('type' => 'Condition', 'data' => array(
'not' => $nega,
'cond' => $data,
));
}
/**
* Literal ::= "'" [data] "'"
*/
private function parseLiteral() {
$data = $this->lexer->current();
if ('\'' !== $data[0])
$this->throwSyntaxError('LITERAL');
$this->lexer->next();
$data = substr($data, 1);
if ('\'' === substr($data, -1))
$data = substr($data, 0, -1);
else {
while ($this->lexer->valid()) {
$tmp = $this->lexer->current();
if ('\'' === substr($tmp, -1)) {
$data .= ' ' . substr($tmp, 0, -1);
break;
}
$data .= $tmp;
$this->lexer->next();
}
}
return array('type' => 'Literal', 'data' => $data);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment