Created
September 21, 2014 15:33
-
-
Save jm42/b5380d71adc114ad73fa to your computer and use it in GitHub Desktop.
Feature-like Language
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| $example = <<<EOS | |
| This is the title, a free-form text | |
| that can have as many lines | |
| of text as I wish. | |
| Play Wikipedia as jm | |
| Spec main | |
| Given the article | |
| When the title is 'Paris' | |
| Then continue to next link | |
| And click | |
| Spec weight | |
| Given a word | |
| Then set ret to count word | |
| And return ret | |
| EOS; | |
| $lexer = new Lexer($example); | |
| $parser = new Parser($lexer); | |
| print_r($parser->parse()); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| class Lexer { | |
| /* Read and transform the source into tokens. */ | |
| const TOKEN = " \t\n"; /* Used to split the input into tokens */ | |
| const T_NONE = 0; | |
| const T_IDEN = 1; | |
| /* 1x base */ | |
| const T_THIS = 10; | |
| const T_PLAY = 11; | |
| const T_AS = 12; | |
| const T_SPEC = 13; | |
| const T_AND = 14; | |
| const T_GIVEN = 15; | |
| const T_WHEN = 16; | |
| const T_THEN = 17; | |
| /* 2x objects */ | |
| const T_A = 20; | |
| const T_THE = 21; | |
| /* 3x positions */ | |
| const T_FIRST = 30; | |
| const T_PREVIOUS = 31; | |
| const T_NEXT = 32; | |
| const T_LAST = 33; | |
| /* 4x conditionals */ | |
| const T_IS = 40; | |
| const T_NOT = 41; | |
| /* 5x actions */ | |
| const T_TO = 50; | |
| private $token; /* Current token in the current position */ | |
| private $position = 0; /* Current position in the input */ | |
| private $peek = array(); | |
| public function __construct($input) { | |
| $this->token = strtok($input, static::TOKEN); | |
| } | |
| public function peek($n=1) { | |
| if (count($this->peek) < $n) | |
| $this->peek[] = strtok(static::TOKEN); | |
| return $this->peek[$n - 1]; | |
| } | |
| public function current() { | |
| return $this->token; | |
| } | |
| public function next() { | |
| $this->position++; | |
| if (count($this->peek) > 0) | |
| $this->token = array_shift($this->peek); | |
| else | |
| $this->token = strtok(static::TOKEN); | |
| } | |
| public function key() { | |
| $value = $this->current(); | |
| $key = static::T_NONE; | |
| if (ctype_alpha($value[0])) { | |
| $name = 'static::T_' . strtoupper($value); | |
| if (defined($name)) { | |
| return constant($name); | |
| } | |
| return self::T_IDEN; | |
| } | |
| return $key; | |
| } | |
| public function valid() { | |
| return false !== $this->token; | |
| } | |
| } | |
| class Parser { | |
| private $lexer; /* It feed us tokens */ | |
| public function __construct(Lexer $lexer) { | |
| $this->lexer = $lexer; | |
| } | |
| public function parse() { | |
| /* Parses the input and returns a tree structure. */ | |
| return $this->parseTame(); | |
| } | |
| protected function throwSyntaxError($expected=null, $n=0) { | |
| if ($n > 0) { | |
| $curr = $this->lexer->peek($n); | |
| $next = $this->lexer->peek($n + 1); | |
| } else { | |
| $curr = $this->lexer->current(); | |
| $next = $this->lexer->peek(); | |
| } | |
| $message = 'Error: '; | |
| $message .= $expected ? "Expected {$expected}, got" : 'Unexpected'; | |
| $message .= $next ? " '{$curr}'." : ' EOS.'; | |
| throw new \RuntimeException($message); | |
| } | |
| protected function match($token) { | |
| if ($this->lexer->key() !== $token) | |
| $this->throwSyntaxError($token); | |
| $this->lexer->next(); | |
| } | |
| /** | |
| * Tame ::= This Play [Section] | |
| */ | |
| private function parseTame() { | |
| $ast = array( | |
| $this->parseThis(), | |
| $this->parsePlay() | |
| ); | |
| while ($this->lexer->key() === Lexer::T_SPEC) { | |
| $ast[] = $this->parseSpec(); | |
| } | |
| if ($this->lexer->valid()) | |
| $this->throwSyntaxError('END'); | |
| return $ast; | |
| } | |
| /** | |
| * This ::= "This" [data]* | |
| */ | |
| private function parseThis() { | |
| $data = 'This'; | |
| $this->match(Lexer::T_THIS); | |
| while ($this->lexer->key() !== Lexer::T_PLAY) { | |
| $data .= ' ' . $this->lexer->current(); | |
| $this->lexer->next(); | |
| } | |
| return array('type' => 'This', 'data' => $data); | |
| } | |
| /** | |
| * Play ::= "Play" <game> "as" <user> | |
| */ | |
| private function parsePlay() { | |
| $this->match(Lexer::T_PLAY); | |
| $game = $this->lexer->current(); | |
| $this->match(Lexer::T_IDEN); | |
| $this->match(Lexer::T_AS); | |
| $user = $this->lexer->current(); | |
| $this->match(Lexer::T_IDEN); | |
| return array('type' => 'Play', 'data' => array( | |
| 'game' => $game, | |
| 'user' => $user, | |
| )); | |
| } | |
| /** | |
| * Spec ::= "Spec" <name> [Given]* [When]* {Then}* | |
| */ | |
| private function parseSpec() { | |
| $this->match(Lexer::T_SPEC); | |
| $name = $this->lexer->current(); | |
| $this->match(Lexer::T_IDEN); | |
| $given = array(); | |
| $when = array(); | |
| $then = array(); | |
| $last = null; | |
| while ($this->lexer->key() !== Lexer::T_SPEC && $this->lexer->valid()) { | |
| $token = $this->lexer->key(); | |
| if (Lexer::T_AND === $token) { | |
| if (null === $last) | |
| $this->throwSyntaxError('GIVEN, WHEN or THEN'); | |
| $token = $last; | |
| } | |
| switch ($token) { | |
| case Lexer::T_GIVEN: | |
| $given[] = $this->parseGiven(); | |
| break; | |
| case Lexer::T_WHEN: | |
| $when[] = $this->parseWhen(); | |
| break; | |
| case Lexer::T_THEN: | |
| $then[] = $this->parseThen(); | |
| break; | |
| default: | |
| $this->throwSyntaxError('GIVEN, WHEN, THEN or AND'); | |
| } | |
| $last = $token; | |
| } | |
| return array('type' => 'Spec', 'data' => array( | |
| 'name' => $name, | |
| 'given' => $given, | |
| 'when' => $when, | |
| 'then' => $then, | |
| )); | |
| } | |
| /** | |
| * Given ::= ( "Given" | "And" ) Object | |
| */ | |
| private function parseGiven() { | |
| $token = $this->lexer->key(); | |
| if (!in_array($token, array(Lexer::T_GIVEN, Lexer::T_AND))) | |
| $this->syntaxError('GIVEN or AND'); | |
| $this->match($token); | |
| $data = $this->parseObject(); | |
| return array('type' => 'Given', 'data' => array( | |
| 'object' => $data, | |
| )); | |
| } | |
| /** | |
| * When ::= ( "When" | "And" ) ( Action | Object ) Condition | |
| */ | |
| private function parseWhen() { | |
| $token = $this->lexer->key(); | |
| if (!in_array($token, array(Lexer::T_WHEN, Lexer::T_AND))) | |
| $this->syntaxError('WHEN or AND'); | |
| $this->match($token); | |
| $object = null; | |
| $action = null; | |
| if (Lexer::T_IDEN === $this->lexer->key()) | |
| $action = $this->parseAction(); | |
| else | |
| $object = $this->parseObject(); | |
| $cond = $this->parseCondition(); | |
| return array('type' => 'When', 'data' => array( | |
| 'action' => $action, | |
| 'object' => $object, | |
| 'condition' => $cond, | |
| )); | |
| } | |
| /** | |
| * Then ::= ( "Then" | "And" ) Action | |
| */ | |
| private function parseThen() { | |
| $token = $this->lexer->key(); | |
| if (!in_array($token, array(Lexer::T_THEN, Lexer::T_AND))) | |
| $this->syntaxError('WHEN or AND'); | |
| $this->match($token); | |
| $action = $this->parseAction(); | |
| return array('type' => 'Then', 'data' => array( | |
| 'action' => $action, | |
| )); | |
| } | |
| /** | |
| * Action ::= <name> [<param>]* ["TO" ( Action | Object )] | |
| */ | |
| private function parseAction() { | |
| $name = $this->lexer->current(); | |
| $this->match(Lexer::T_IDEN); | |
| $params = array(); | |
| while (Lexer::T_IDEN === $this->lexer->key()) { | |
| $params[] = $this->lexer->current(); | |
| $this->lexer->next(); | |
| } | |
| $object = null; | |
| $action = null; | |
| if (Lexer::T_TO === $this->lexer->key()) { | |
| $this->match(Lexer::T_TO); | |
| if (Lexer::T_IDEN === $this->lexer->key()) | |
| $action = $this->parseAction(); | |
| else | |
| $object = $this->parseObject(); | |
| } | |
| return array('type' => 'Action', 'data' => array( | |
| 'name' => $name, | |
| 'params' => $params, | |
| 'object' => $object, | |
| 'action' => $action, | |
| )); | |
| } | |
| /** | |
| * Object ::= ( "A" | "THE" | Position ) <name> | |
| * Position ::= ( "FIRST" | "PREVIOUS" | "NEXT" | "LAST" ) | |
| */ | |
| private function parseObject() { | |
| $token = $this->lexer->key(); | |
| $expected = array(Lexer::T_A, Lexer::T_THE, | |
| Lexer::T_FIRST, Lexer::T_PREVIOUS, | |
| Lexer::T_NEXT, Lexer::T_LAST); | |
| if (!in_array($token, $expected)) | |
| $this->throwSyntaxError('A, THE, FIRST, PREVIOUS, NEXT or LAST'); | |
| $this->match($token); | |
| $name = $this->lexer->current(); | |
| $this->match(Lexer::T_IDEN); | |
| return array('type' => 'Object', 'data' => array( | |
| 'type' => $token, // FIXME make me a literal | |
| 'name' => $name, | |
| )); | |
| } | |
| /** | |
| * Condition ::= "IS" ["NOT"] <literal> | |
| */ | |
| private function parseCondition() { | |
| $this->match(Lexer::T_IS); | |
| $nega = Lexer::T_NOT === $this->lexer->key(); | |
| if ($nega) | |
| $this->lexer->next(); | |
| $data = $this->parseLiteral(); | |
| return array('type' => 'Condition', 'data' => array( | |
| 'not' => $nega, | |
| 'cond' => $data, | |
| )); | |
| } | |
| /** | |
| * Literal ::= "'" [data] "'" | |
| */ | |
| private function parseLiteral() { | |
| $data = $this->lexer->current(); | |
| if ('\'' !== $data[0]) | |
| $this->throwSyntaxError('LITERAL'); | |
| $this->lexer->next(); | |
| $data = substr($data, 1); | |
| if ('\'' === substr($data, -1)) | |
| $data = substr($data, 0, -1); | |
| else { | |
| while ($this->lexer->valid()) { | |
| $tmp = $this->lexer->current(); | |
| if ('\'' === substr($tmp, -1)) { | |
| $data .= ' ' . substr($tmp, 0, -1); | |
| break; | |
| } | |
| $data .= $tmp; | |
| $this->lexer->next(); | |
| } | |
| } | |
| return array('type' => 'Literal', 'data' => $data); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment