johnsogg · March 27, 2016 17:12
diff --git a/Expr.g4 b/Expr.g4
 grammar Expr;

 /* 

 Sample input should only have three statements in it, and should parse in a millisecond or three:

  (1 +
        2 *
             (3 / 4 - 5
                        + 7)
                              - 1)
 -1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1 + -1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1
  1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1 

 */
 // Note! Semantic Predicates in use! (aka sempred)
 // 
 // The following block is included in the lexer without ANTLR understanding what it does. Unfortunately
 // it must be written in the runtime target's language, so it ties this grammar file to a particular runtime.
 // Which is not bueno if you use external tools to test the grammar that use a different runtime from the
 // one chosen here. E.g. If you use the IntelliJ plugin or the cmd line `grun`, but the grammar includes
 // JavaScript semantic predicates, the external tools won't work.

 @lexer::members {
 ExprLexer.prototype.nesting = 0;
 }

 block
    :   statement+ EOF
    ;

 // In this grammar, statements are terminated by an 'end of statement' token.

 statement
    :   expression EOS
    ;

 expression
    :   LP expression RP
    |   literal
    |   op=(PLUS | MINUS) expression
    |   expression op=(MUL | DIV) expression
    |   expression op=(MINUS | PLUS) expression
    ;

 literal
    :   NUM
    ;

 PLUS
    :   '+'
    ;

 MINUS
    :   '-'
    ;

 MUL
    :   '*'
    ;

 DIV
    :   '/'
    ;

 // The {this.nesting++;} thing below is an ANTLR `action`, which is blindly copied in
 // to the Lexer (ExprLexer.js) without ANTLR understanding what it does. The purpose is to
 // increment a nesting variable when we see an opening paren. There is a corresponding
 // decrement for closing parens. This is used in the IGNORE_NEWLINE rule later on.

 LP
    :   '(' {this.nesting++;}
    ;

 RP
    :   ')' {this.nesting--;}
    ;

 NUM
    :   INT+
    |   INT* '.' INT+
    ;

 fragment
 INT
    :   ('0'..'9')+
    ;

 // The `{foo}?` thing below is a semantic predicate. It tells the runtime to evaluate a
 // statement in whatever the target lang is, here it is JS. If it is true, the rule is
 // allowed to match. For our purposes then it will only be 'switched on' when we have
 // seen more opening parens than closing parens.
 //
 // It is important that this lexer rule appear before the end of statement rule (EOS).

 IGNORE_NEWLINE
    :   '\r'? '\n' {this.nesting > 0}? -> skip
    ;

 // If no other previously defined lexer rule matched '\r'? '\n' exactly, then this rule
 // will. This is how newlines can be interpreted as skipped, or as EOS tokens, depending
 // on the semantics of our little Expr language.

 EOS
    :   '\r'? '\n'
    ;

 WS
    :   [ \t]+ -> skip
    ;
diff --git a/main.js b/main.js
 #!/usr/bin/env node

 // Usage: 
 //   antlr4 -Dlanguage=JavaScript Expr.g4
 //   node main.js testexpr
 //
 // You'll need the antlr4 JS runtime installed: node install antlr4 (I think)
 //
 // You'll also need the antlr4 parser generator installed (different from the runtime).
 // I have an 'antlr4' script in my path like this:
 // 
 // #!/usr/bin/env bash
 // java -jar /usr/local/lib/antlr-4.5.2-complete.jar $*
 //
 var antlr4 = require('antlr4')
 var ExprLexer = require("./ExprLexer").ExprLexer
 var ExprParser = require("./ExprParser").ExprParser


 var fs = require('fs')
  , path = require('path')

 var filePath = process.argv[2]

 fs.readFile(filePath, {encoding: 'utf-8'}, function(err,data) {
  if (!err) {
    console.log('received data:\n' + data); // comment this out if you want to time it more accurately
    var chars = new antlr4.InputStream(data)
    var lexer = new ExprLexer(chars)
    var tokens  = new antlr4.CommonTokenStream(lexer)
    var parser = new ExprParser(tokens)
    parser.buildParseTrees = true
    t = parser.block()
    console.log(t) // comment this out if you want to time it more accurately
  } else {
    console.log(err);
  }
 });
diff --git a/test.expr b/test.expr
  (1 +
        2 *
             (3 / 4 - 5
                        + 7)
                              - 1)
 -1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1 + -1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1
  1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1
	grammar Expr;

	/*

	Sample input should only have three statements in it, and should parse in a millisecond or three:

	(1 +
	2 *
	(3 / 4 - 5
	+ 7)
	- 1)
	-1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1 + -1 + -84 - -2 * -1 / -2 * -1 - 1 + 1 / -1
	1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1 + 1 - 1 + -1 + +1 - -1 - +1

	*/
	// Note! Semantic Predicates in use! (aka sempred)
	//
	// The following block is included in the lexer without ANTLR understanding what it does. Unfortunately
	// it must be written in the runtime target's language, so it ties this grammar file to a particular runtime.
	// Which is not bueno if you use external tools to test the grammar that use a different runtime from the
	// one chosen here. E.g. If you use the IntelliJ plugin or the cmd line `grun`, but the grammar includes
	// JavaScript semantic predicates, the external tools won't work.

	@lexer::members {
	ExprLexer.prototype.nesting = 0;
	}

	block
	: statement+ EOF
	;

	// In this grammar, statements are terminated by an 'end of statement' token.

	statement
	: expression EOS
	;

	expression
	: LP expression RP
	\| literal
	\| op=(PLUS \| MINUS) expression
	\| expression op=(MUL \| DIV) expression
	\| expression op=(MINUS \| PLUS) expression
	;

	literal
	: NUM
	;

	PLUS
	: '+'
	;

	MINUS
	: '-'
	;

	MUL
	: '*'
	;

	DIV
	: '/'
	;

	// The {this.nesting++;} thing below is an ANTLR `action`, which is blindly copied in
	// to the Lexer (ExprLexer.js) without ANTLR understanding what it does. The purpose is to
	// increment a nesting variable when we see an opening paren. There is a corresponding
	// decrement for closing parens. This is used in the IGNORE_NEWLINE rule later on.

	LP
	: '(' {this.nesting++;}
	;

	RP
	: ')' {this.nesting--;}
	;

	NUM
	: INT+
	\| INT* '.' INT+
	;

	fragment
	INT
	: ('0'..'9')+
	;

	// The `{foo}?` thing below is a semantic predicate. It tells the runtime to evaluate a
	// statement in whatever the target lang is, here it is JS. If it is true, the rule is
	// allowed to match. For our purposes then it will only be 'switched on' when we have
	// seen more opening parens than closing parens.
	//
	// It is important that this lexer rule appear before the end of statement rule (EOS).

	IGNORE_NEWLINE
	: '\r'? '\n' {this.nesting > 0}? -> skip
	;

	// If no other previously defined lexer rule matched '\r'? '\n' exactly, then this rule
	// will. This is how newlines can be interpreted as skipped, or as EOS tokens, depending
	// on the semantics of our little Expr language.

	EOS
	: '\r'? '\n'
	;

	WS
	: [ \t]+ -> skip
	;
	#!/usr/bin/env node

	// Usage:
	// antlr4 -Dlanguage=JavaScript Expr.g4
	// node main.js testexpr
	//
	// You'll need the antlr4 JS runtime installed: node install antlr4 (I think)
	//
	// You'll also need the antlr4 parser generator installed (different from the runtime).
	// I have an 'antlr4' script in my path like this:
	//
	// #!/usr/bin/env bash
	// java -jar /usr/local/lib/antlr-4.5.2-complete.jar $*
	//
	var antlr4 = require('antlr4')
	var ExprLexer = require("./ExprLexer").ExprLexer
	var ExprParser = require("./ExprParser").ExprParser


	var fs = require('fs')
	, path = require('path')

	var filePath = process.argv[2]

	fs.readFile(filePath, {encoding: 'utf-8'}, function(err,data) {
	if (!err) {
	console.log('received data:\n' + data); // comment this out if you want to time it more accurately
	var chars = new antlr4.InputStream(data)
	var lexer = new ExprLexer(chars)
	var tokens = new antlr4.CommonTokenStream(lexer)
	var parser = new ExprParser(tokens)
	parser.buildParseTrees = true
	t = parser.block()
	console.log(t) // comment this out if you want to time it more accurately
	} else {
	console.log(err);
	}
	});