Created
March 27, 2016 17:12
-
-
Save johnsogg/7f9090ac34280791a499 to your computer and use it in GitHub Desktop.
ANTLR4 semantic pred (sempred) example of Python-like context-sensitivity using JavaScript target
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar Expr; | |
/* | |
Sample input should only have three statements in it, and should parse in a millisecond or three: | |
(1 + | |
2 * | |
(3 / 4 - 5 | |
+ 7) | |
- 1) | |
-1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1 + -1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1 | |
1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1 | |
*/ | |
// Note! Semantic Predicates in use! (aka sempred) | |
// | |
// The following block is included in the lexer without ANTLR understanding what it does. Unfortunately | |
// it must be written in the runtime target's language, so it ties this grammar file to a particular runtime. | |
// Which is not bueno if you use external tools to test the grammar that use a different runtime from the | |
// one chosen here. E.g. If you use the IntelliJ plugin or the cmd line `grun`, but the grammar includes | |
// JavaScript semantic predicates, the external tools won't work. | |
@lexer::members { | |
ExprLexer.prototype.nesting = 0; | |
} | |
block | |
: statement+ EOF | |
; | |
// In this grammar, statements are terminated by an 'end of statement' token. | |
statement | |
: expression EOS | |
; | |
expression | |
: LP expression RP | |
| literal | |
| op=(PLUS | MINUS) expression | |
| expression op=(MUL | DIV) expression | |
| expression op=(MINUS | PLUS) expression | |
; | |
literal | |
: NUM | |
; | |
PLUS | |
: '+' | |
; | |
MINUS | |
: '-' | |
; | |
MUL | |
: '*' | |
; | |
DIV | |
: '/' | |
; | |
// The {this.nesting++;} thing below is an ANTLR `action`, which is blindly copied in | |
// to the Lexer (ExprLexer.js) without ANTLR understanding what it does. The purpose is to | |
// increment a nesting variable when we see an opening paren. There is a corresponding | |
// decrement for closing parens. This is used in the IGNORE_NEWLINE rule later on. | |
LP | |
: '(' {this.nesting++;} | |
; | |
RP | |
: ')' {this.nesting--;} | |
; | |
NUM | |
: INT+ | |
| INT* '.' INT+ | |
; | |
fragment | |
INT | |
: ('0'..'9')+ | |
; | |
// The `{foo}?` thing below is a semantic predicate. It tells the runtime to evaluate a | |
// statement in whatever the target lang is, here it is JS. If it is true, the rule is | |
// allowed to match. For our purposes then it will only be 'switched on' when we have | |
// seen more opening parens than closing parens. | |
// | |
// It is important that this lexer rule appear before the end of statement rule (EOS). | |
IGNORE_NEWLINE | |
: '\r'? '\n' {this.nesting > 0}? -> skip | |
; | |
// If no other previously defined lexer rule matched '\r'? '\n' exactly, then this rule | |
// will. This is how newlines can be interpreted as skipped, or as EOS tokens, depending | |
// on the semantics of our little Expr language. | |
EOS | |
: '\r'? '\n' | |
; | |
WS | |
: [ \t]+ -> skip | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
// Usage: | |
// antlr4 -Dlanguage=JavaScript Expr.g4 | |
// node main.js testexpr | |
// | |
// You'll need the antlr4 JS runtime installed: node install antlr4 (I think) | |
// | |
// You'll also need the antlr4 parser generator installed (different from the runtime). | |
// I have an 'antlr4' script in my path like this: | |
// | |
// #!/usr/bin/env bash | |
// java -jar /usr/local/lib/antlr-4.5.2-complete.jar $* | |
// | |
var antlr4 = require('antlr4') | |
var ExprLexer = require("./ExprLexer").ExprLexer | |
var ExprParser = require("./ExprParser").ExprParser | |
var fs = require('fs') | |
, path = require('path') | |
var filePath = process.argv[2] | |
fs.readFile(filePath, {encoding: 'utf-8'}, function(err,data) { | |
if (!err) { | |
console.log('received data:\n' + data); // comment this out if you want to time it more accurately | |
var chars = new antlr4.InputStream(data) | |
var lexer = new ExprLexer(chars) | |
var tokens = new antlr4.CommonTokenStream(lexer) | |
var parser = new ExprParser(tokens) | |
parser.buildParseTrees = true | |
t = parser.block() | |
console.log(t) // comment this out if you want to time it more accurately | |
} else { | |
console.log(err); | |
} | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(1 + | |
2 * | |
(3 / 4 - 5 | |
+ 7) | |
- 1) | |
-1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1 + -1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1 | |
1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment