Skip to content

Instantly share code, notes, and snippets.

@Centaur
Last active August 29, 2015 14:26
Show Gist options
  • Save Centaur/7cbd0e3611f7aa78ccaa to your computer and use it in GitHub Desktop.
Save Centaur/7cbd0e3611f7aa78ccaa to your computer and use it in GitHub Desktop.
grammar ECMAScript;
options {
// Allow any char but \uFFFF (16 bit -1)
charVocabulary='\u0000'..'\uFFFE';
}
/* this comes from section A.5 but is really the starting point, so
* it is present here.
*/
program:
sourceElements EOF
;
/* A.1 Lexical Grammar */
sourceCharacter:
/* any unicode character */
/* see section 6 */
SOURCE_CHAR
;
inputElementDiv:
whiteSpace
| lineTerminator
| comment
| token
| divPunctuator
;
inputElementRegExp:
whiteSpace
| lineTerminator
| comment
| token
| regularExpressionLiteral
;
whiteSpace:
TAB
| VT
| FF
| SP
| NBSP
| USP
;
lineTerminator:
| LF
| CR
| LS
| PS
;
comment:
multiLineComment
| singleLineComment
;
multiLineComment:
'/*' multiLineCommentChars '*/'
;
multiLineCommentChars:
multiLineNotAsterikChar multiLineCommentChars
| ASTERISK postAsterikCommentChars
;
postAsterikCommentChars:
multiLineNotForwardSlashOrAsterikChar multiLineCommentChars
| ASTERISK postAsterikCommentChars
;
multiLineNotAsterikChar:
sourceCharacter /* but not ASTERISK */
;
multiLineNotForwardSlashOrAsterikChar:
sourceCharacter /* but not FORWARD-SLASH or ASTERISK */
;
singleLineComment:
'//' singleLineCommentChars
;
singleLineCommentChars:
singleLineCommentChar singleLineCommentChars
;
singleLineCommentChar:
sourceCharacter /* but not lineTerminator */
;
token:
reservedWord
| identifier
/* syntactic predicate used to disambiguate between DOT on ALT 3 and ALT 5 */
| (DOT decimalDigits)=> numericLiteral
/* syntactic predicate used to disambiguate between APOSTROPHE on ALT 4 and ALT 5 */
| (APOSTROPHE (singleStringCharacters)? APOSTROPHE)=> stringLiteral
| punctuator
;
reservedWord:
keyword
| futureReservedWord
| nullLiteral
| booleanLiteral
;
keyword:
'break' | 'case' | 'catch' | 'continue' | 'default' | 'delete' | 'do'
| 'else' | 'finally' | 'for' | 'function' | 'if' | 'in' | 'instanceof'
| 'new' | 'return' | 'switch' | 'this' | 'throw' | 'try' | 'typeof'
| 'var' | 'void' | 'while' | 'with'
;
futureReservedWord:
'abstract' | 'boolean' | 'byte' | 'char' | 'class' | 'const' | 'debugger' | 'double'
| 'enum' | 'export' | 'extends' | 'final' | 'float' | 'goto' | 'implements' | 'import'
| 'int' | 'interface' | 'long' | 'native' | 'package' | 'private' | 'protected' | 'public'
| 'short' | 'static' | 'super' | 'synchronized' | 'throws' | 'transient' | 'volatile'
;
identifier:
identifierName /* but not reservedWord */
;
identifierName:
/*
* left factorization:
*
* SPEC:
* identifierStart
* | identifierName identifierPart
* ->
* ((identifierStart)+ (identifierPart)?)+
*/
identifierStart (identifierPart)*
;
identifierStart:
unicodeLetter
| DOLLAR
| UNDERSCORE
| unicodeEscapeSequence
;
identifierPart:
/* syntactic predicate to remove non-determinism upon alts 1 and 5
* - always choose the identifierStart when possible
*/
(unicodeEscapeSequence identifierPart)=> identifierStart
| unicodeCombiningMark
| unicodeDigit
| unicodeConnectorPunctuation
| unicodeEscapeSequence
;
unicodeLetter:
/* any character in the unicode categories:
"uppercase letter (Lu)",
"lowercase letter (Li)",
"titlecase letter (Lt)",
"modifier letter (Lm)",
"other letter (lo)",
"letter number (NI)" */
UNICODE_LETTER
;
unicodeCombiningMark:
/* any character in the unicode categories:
"non-spacing mark (Mn)"
"combining spacing mark (Mc)"
*/
UNICODE_NONSPACING_MARK
| UNICODE_COMBINING_MARK
;
unicodeDigit:
/* any character in the unicode category "decimal number (Nd)" */
UNICODE_DIGIT
;
unicodeConnectorPunctuation:
/* any character in the unicode category "connector punctuation (Pc)" */
UNICODE_CONNECTOR_PUNCTUATION
;
unicodeEscapeSequence:
'\\u' hexDigit hexDigit hexDigit hexDigit
;
hexDigit:
/* explicitly enumerated in grammar */
HEXDIGIT
;
punctuator:
LBRACE | RBRACE | LPAREN | RPAREN | LBRACK | RBRACK
| DOT | SEMI | APOSTROPHE | LT | GT | LTEQ
| GTEQ | EQ2 | NOTEQ | EQ3 | NOTEQ2
| PLUS | MINUS | STAR | PERCENT | PLUS2 | MINUS2
| LSHIFT | RSHIFT | GT3 | AMPER | PIPE | CAROT
| EXCLAMATION | TILDE | AMPER2 | PIPE2 | QUESTION | COLON
| EQ | PLUSEQ | MINUSEQ | TIMESEQ | PERCENTEQ | LSHIFTEQ
| RSHIFTEQ | GT3EQ | AMPEREQ | PIPEEQ | CAROTEQ
;
divPunctuator:
DIVIDE
| DIVIDEEQ
;
literal:
nullLiteral
| booleanLiteral
| numericLiteral
| stringLiteral
;
nullLiteral:
'null'
;
booleanLiteral:
'true'
| 'false'
;
numericLiteral:
decimalLiteral
| hexIntegerLiteral
;
decimalLiteral:
/*
* SPEC:
* decimalIntegerLiteral DOT (decimalDigits)? (exponentPart)?
* | DOT decimalDigits (exponentPart)?
* | decimalIntegerLiteral (exponentPart)?
* ->
*/
decimalIntegerLiteral decimalIntegerLiteralTail
| DOT decimalDigits (exponentPart)?
;
decimalIntegerLiteralTail:
/* this is necessary because of the way the grammar gets parsed */
DOT (decimalDigits)? (exponentPart)?
| exponentPart
;
decimalIntegerLiteral:
ZERO
| NON_ZERO_DIGIT (decimalDigits)?
;
decimalDigits:
/*
* SPEC:
* decimalDigit
* | decimalDigits decimalDigit
* =>
* (decimalDigits)+
*/
(decimalDigit)+
;
decimalDigit:
DIGIT /* grammar has each one explicitely listed */
;
exponentIndicator:
EXPONENT_INDICATOR /* grammar has both e and E listed */
;
signedInteger:
decimalDigits
| PLUS decimalDigits
| MINUS decimalDigits
;
hexIntegerLiteral:
/*
* SPEC:
* '0x' hexDigit
* | '0X' hexDigit
* | hexIntegerLiteral hexDigit
* ->
* ('0x'|'0X') (hexDigit)+
*/
('0x'|'0X') (hexDigit)+
;
stringLiteral:
QUOTE (doubleStringCharacters)? QUOTE
| APOSTROPHE (singleStringCharacters)? APOSTROPHE
;
doubleStringCharacters:
doubleStringCharacter (doubleStringCharacters)?
;
singleStringCharacters:
singleStringCharacter (singleStringCharacters)?
;
doubleStringCharacter:
sourceCharacter /* but not double quote or backslash or line terminator */
BSLASH escapeSequence
;
singleStringCharacter:
sourceCharacter /* but not single quote or backslash or line terminator */
| BSLASH escapeSequence
;
escapeSequence:
characterEscapeSequence
| ZERO /* [lookahead not a member of decimalDigit] */
| hexEscapeSequence
| unicodeEscapeSequence
;
characterEscapeSequence:
singleEscapeCharacter
| nonEscapeCharacter
;
singleEscapeCharacter:
APOSTROPHE | QUOTE | BSLASH | LOWER_B | LOWER_F | LOWER_N | LOWER_R | LOWER_T | LOWER_V
;
nonEscapeCharacter:
sourceCharacter /* but not escapeCharacter or lineTerminator */
;
escapeCharacter:
singleEscapeCharacter
| decimalDigit
| LOWER_X
| LOWER_U
;
hexEscapeSequence:
LOWER_X hexDigit hexDigit
;
/* defined above
unicodeEscapeSequence:
LOWER_U hexDigit hexDigit hexDigit hexDigit
;*/
regularExpressionLiteral:
SLASH regularExpressionBody SLASH regularExpressionFlags
;
regularExpressionBody:
regularExpressionFirstChar regularExpressionChars
;
regularExpressionChars:
/* [empty] */
/*
* SPEC: regularExpressionChars regularExpressionChar
* ->: regularExpressionChar regularExpressionChars
*/
regularExpressionChar regularExpressionChars
;
regularExpressionFirstChar:
nonTerminator /* but not * or \ or / */
| backslashSequence
;
regularExpressionChar:
nonTerminator /* but not * or \ or / */
| backslashSequence
;
backslashSequence:
BSLASH nonTerminator
;
nonTerminator:
sourceCharacter /* but not lineTerminator */
;
regularExpressionFlags:
/*
* SPEC:
* [empty] | regularExpressionFlags identifierPart
* ->
* (identifierPart|)*
*/
(identifierPart)*
;
/* A.2 Number Conversions */
stringNumericLiteral:
/*
* SPEC:
* (strWhiteSpace)?
* | (strWhiteSpace)? strNumericLiteral (strWhiteSpace)?
*/
(strWhiteSpace)? (strNumericLiteral (strWhiteSpace)?)?
;
strWhiteSpace:
strWhiteSpaceChar (strWhiteSpace)?
;
strWhiteSpaceChar:
TAB | SP | NBSP | FF | VT | CR | LF | LS | PS | USP
;
strNumericLiteral:
strDecimalLiteral
| hexIntegerLiteral
;
strDecimalLiteral:
strUnsignedDecimalLiteral
| PLUS strUnsignedDecimalLiteral
| MINUS strUnsignedDecimalLiteral
;
strUnsignedDecimalLiteral:
/*
* SPEC:
* 'Infinity'
* | decimalDigits DOT (decimalDigits)? (exponentPart)?
* | DOT decimalDigits (exponentPart)?
* | decimalDigits (exponentPart)?
*/
'Infinity'
/* syntactic predicate used to remove nondet between ALTs 2 and 4 */
| (decimalDigits DOT)=> decimalDigits DOT (decimalDigits)? (exponentPart)?
| DOT decimalDigits (exponentPart)?
| decimalDigits (exponentPart)?
;
/* defined above
decimalDigits:
decimalDigit
| decimalDigits decimalDigit
;*/
/* decimalDigit:
DIGIT // grammar has them explicitly enumerated
;*/
exponentPart:
exponentIndicator signedInteger
;
/* redefined above
exponentIndicator:
EXPONENT_INDICATOR
;*/
/* redefined above
signedInteger:
decimalDigits
| PLUS decimalDigits
| MINUS decimalDigits
;*/
/* redefined above
hexIntegerLiteral:
'0x' hexDigit
| '0X' hexDigit
| hexIntegerLiteral hexDigit
;*/
/* redefined above
hexDigit:
HEXDIGIT // grammar has them explicitely enumerated
; */
/* A.3 Expressions */
primaryExpression:
'this'
| identifier
| literal
| arrayLiteral
| objectLiteral
| LPAREN expression RPAREN
;
arrayLiteral:
/*
* SPEC:
* LBRACK (elision)? RBRACK
* | LBRACK elementList RBRACK
* | LBRACK elementList COMMA (elision)? RBRACK
*/
LBRACK (
(elision)?
| elementList (COMMA (elision)?)?
) RBRACK
;
elementList:
/*
* SPEC:
* (elision)? assignmentExpression
* | elementList COMMA (elision)? assignmentExpression
*/
(elision)? assignmentExpression (elementListTail)*
;
elementListTail:
COMMA (elision)? assignmentExpression
;
elision:
/*
* SPEC:
* COMMA
* | elision COMMA
* ->
* (COMMA)+
*/
(COMMA)+
;
objectLiteral:
/*
* SPEC:
* LBRACE RBRACE
* | LBRACE propertyNameAndValueList RBRACE
*/
LBRACE (propertyNameAndValueList)? RBRACE
;
propertyNameAndValueList:
/*
* SPEC:
* propertyName COLON assignmentExpression
* | propertyNameAndValueList COMMA propertyName COLON assignmentExpression
*/
propertyName COLON assignmentExpression (propertyNameAndValueListTail)*
;
propertyNameAndValueListTail:
COMMA propertyName COLON assignmentExpression
;
propertyName:
identifier
| stringLiteral
| numericLiteral
;
memberExpression:
/*
* SPEC:
* primaryExpression
* | functionExpression
* | memberExpression LBRACK expression RBRACK
* | memberExpression DOT identifier
* | 'new' memberExpression arguments
* ->
*/
( primaryExpression
| functionExpression
| 'new' memberExpression arguments
) (memberExpressionTail)*
;
memberExpressionTail:
/* SPEC: not a part of formal grammar */
LBRACK expression RBRACK
| DOT identifier
;
newExpression:
/*
* SPEC:
* memberExpression
* | 'new' newExpression
*/
/* syntactic predicate added to resolve between nondet in ALTs 1 and 2 */
('new' newExpression)=> 'new' newExpression
| memberExpression
;
callExpression:
/*
* SPEC:
* memberExpression arguments
* | callExpression arguments
* | callExpression LBRACK expression RBRACK
* | callExpression DOT identifier
*/
memberExpression arguments (callExpressionTail)*
;
callExpressionTail:
arguments
| LBRACK expression RBRACK
| DOT identifier
;
arguments:
/*
* SPEC:
* LPAREN RPAREN
* | LPAREN argumentList RPAREN
*/
LPAREN (argumentList)? RPAREN
;
argumentList:
/*
* SPEC:
* assignmentExpression
* | argumentList COMMA assignmentExpression
*/
assignmentExpression (argumentListTail)*
;
argumentListTail:
COMMA assignmentExpression
;
leftHandSideExpression:
/*
* SPEC:
* newExpression
* | callExpression
*/
('new' newExpression)=> newExpression
| callExpression
;
postfixExpression:
/*
* SPEC:
* leftHandSideExpression
* | leftHandSideExpression / no line terminator here / PLUS2
* | leftHandSideExpression / no line terminator here / MINUS2
*/
leftHandSideExpression (PLUS2|MINUS2)?
;
unaryExpression:
postfixExpression
| 'delete' unaryExpression
| 'void' unaryExpression
| 'typeof' unaryExpression
| PLUS2 unaryExpression
| MINUS2 unaryExpression
| PLUS unaryExpression
| MINUS unaryExpression
| TILDE unaryExpression
| EXCLAMATION unaryExpression
;
multiplicativeExpression:
/*
* SPEC:
* unaryExpression
* | multiplicativeExpression ASTERISK unaryExpression
* | multiplicativeExpression DIVIDE unaryExpression
* | multiplicativeExpression PERCENT unaryExpression
*/
unaryExpression (multiplicativeExpressionTail)*
;
multiplicativeExpressionTail:
ASTERISK unaryExpression
| DIVIDE unaryExpression
| PERCENT unaryExpression
;
additiveExpression:
/*
* SPEC:
* multiplicativeExpression
* | additiveExpression PLUS multiplicativeExpression
* | additiveExpression MINUS multiplicativeExpression
*/
multiplicativeExpression (additiveExpressionTail)*
;
additiveExpressionTail:
PLUS multiplicativeExpression
| MINUS multiplicativeExpression
;
shiftExpression:
/*
* SPEC:
* additiveExpression
* | shiftExpression LSHIFT additiveExpression
* | shiftExpression RSHIFT additiveExpression
* | shiftExpression GT3 additiveExpression
*/
additiveExpression (shiftExpressionTail)*
;
shiftExpressionTail:
LSHIFT additiveExpression
| RSHIFT additiveExpression
| GT3 additiveExpression
;
relationalExpression:
/*
* SPEC:
* shiftExpression
* | relationalExpression LT shiftExpression
* | relationalExpression GT shiftExpression
* | relationalExpression LTEQ shiftExpression
* | relationalExpression GTEQ shiftExpression
* | relationalExpression 'instanceof' shiftExpression
* | relationalExpression 'in' shiftExpression
*/
shiftExpression (relationalExpressionTail)*
;
relationalExpressionTail:
LT shiftExpression
| GT shiftExpression
| LTEQ shiftExpression
| GTEQ shiftExpression
| 'instanceof' shiftExpression
| 'in' shiftExpression
;
relationalExpressionNoln:
shiftExpression
| relationalExpression (LT|GT|LTEQ|GTEQ|'instanceof') shiftExpression
;
equalityExpression:
/*
* SPEC:
* relationalExpression
* | equalityExpression (EQ2|NOTEQQ|EQ3|NOTEQQ2) relationalExpression
*/
relationalExpression (equalityExpressionTail)*
;
equalityExpressionTail:
(EQ2|NOTEQ|EQ3|NOTEQ2) relationalExpression
;
equalityExpressionNoln:
/*
* SPEC:
* relationalExpressionNoln
* | equalityExpressionNoln (EQ2|NOTEQQ|EQ3|NOTEQQ2) relationalExpressionNoln
*/
relationalExpressionNoln (equalityExpressionNolnTail)*
;
equalityExpressionNolnTail:
(EQ2|NOTEQ|EQ3|NOTEQ2) relationalExpressionNoln
;
bitwiseAndExpression:
/*
* SPEC:
* equalityExpression
* | bitwiseAndExpression AMPER equalityExpression
*/
equalityExpression (bitwiseAndExpressionTail)*
;
bitwiseAndExpressionTail:
AMPER equalityExpression
;
bitwiseAndExpressionNoln:
/*
* SPEC:
* equalityExpressionNoln
* | bitwiseAndExpressionNoln AMPER equalityExpressionNoln
*/
equalityExpressionNoln (bitwiseAndExpressionNolnTail)*
;
bitwiseAndExpressionNolnTail:
AMPER equalityExpressionNoln
;
bitwiseXorExpression:
/*
* SPEC:
* bitwiseAndExpression
* | bitwiseXorExpression CAROT bitwiseAndExpression
*/
bitwiseAndExpression (bitwiseXorExpressionTail)*
;
bitwiseXorExpressionTail:
CAROT bitwiseAndExpression
;
bitwiseXorExpressionNoln:
/*
* SPEC:
* bitwiseAndExpressionNoln
* | bitwiseXorExpressionNoln CAROT bitwiseAndExpressionNoln
*/
bitwiseAndExpressionNoln (bitwiseXorExpressionNolnTail)*
;
bitwiseXorExpressionNolnTail:
CAROT bitwiseAndExpressionNoln
;
bitwiseOrExpression:
/*
* SPEC:
* bitwiseXorExpression
* | bitwiseOrExpression PIPE bitwiseXorExpression
*/
bitwiseXorExpression (bitwiseOrExpressionTail)*
;
bitwiseOrExpressionTail:
PIPE bitwiseXorExpression
;
bitwiseOrExpressionNoln:
/*
* SPEC:
* bitwiseXorExpressionNoln
* | bitwiseOrExpressionNoln PIPE bitwiseXorExpressionNoln
*/
bitwiseXorExpressionNoln (bitwiseOrExpressionNolnTail)*
;
bitwiseOrExpressionNolnTail:
PIPE bitwiseOrExpressionNolnTail
;
logicalAndExpression:
bitwiseOrExpression
| logicalAndExpressionNoln AMPER2 bitwiseOrExpression
;
logicalAndExpressionNoln:
/*
* SPEC:
* bitwiseOrExpressionNoln
* | logicalAndExpressionNoln AMPER2 bitwiseOrExpressionNoln
*/
bitwiseOrExpressionNoln (logicalAndExpressionNolnTail)*
;
logicalAndExpressionNolnTail:
AMPER2 bitwiseOrExpressionNoln
;
logicalOrExpression:
/*
* SPEC:
* logicalAndExpression
* | logicalOrExpression '||' logicalAndExpression
*/
logicalAndExpression (logicalOrExpressionTail)*
;
logicalOrExpressionTail:
'||' logicalAndExpression
;
logicalOrExpressionNoln:
/*
* SPEC:
* logicalAndExpressionNoln
* | logicalOrExpressionNoln '||' logicalAndExpressionNoln
*/
logicalAndExpressionNoln (logicalOrExpressionNolnTail)*
;
logicalOrExpressionNolnTail:
'||' logicalAndExpressionNoln
;
conditionalExpression:
logicalOrExpression
| logicalOrExpression QUESTION assignmentExpression COLON assignmentExpression
;
conditionalExpressionNoln:
logicalOrExpressionNoln
| logicalOrExpressionNoln QUESTION assignmentExpressionNoln COLON assignmentExpressionNoln
;
assignmentExpression:
/*
* SPEC:
* conditionalExpression
* | leftHandSideExpression assignmentOperator assignmentExpression
*/
(leftHandSideExpression assignmentOperator) =>
leftHandSideExpression assignmentOperator assignmentExpression
| conditionalExpression
;
assignmentExpressionNoln:
conditionalExpressionNoln
| leftHandSideExpression assignmentOperator assignmentExpressionNoln
;
assignmentOperator:
/* note that in the grammar these are listed out explicitely */
EQ | TIMESEQ | DIVIDEEQ | PERCENTEQ | PLUSEQ | MINUSEQ | LSHIFTEQ | RSHIFTEQ
| GT3EQ | AMPEREQ | CAROTEQ | PIPEEQ
;
expression:
/*
* SPEC:
* assignmentExpression
* | expression COMMA assignmentExpression
*/
assignmentExpression (expressionTail)*
;
expressionTail:
COMMA assignmentExpression
;
expressionNoln:
/*
* SPEC:
* assignmentExpressionNoln
* | expressionNoln COMMA assignmentExpressionNoln
*/
assignmentExpressionNoln (expressionNolnTail)*
;
expressionNolnTail:
COMMA assignmentExpressionNoln
;
/* A.4 Statements */
statement:
block
| variableStatement
| emptyStatement
| expressionStatement
| ifStatement
| iterationStatement
| continueStatement
| breakStatement
| returnStatement
| withStatement
| labelledStatement
| switchStatement
| throwStatement
| tryStatement
;
block:
LBRACE (statementList)? RBRACE
;
statementList:
/*
* SPEC:
* statement
* | statementList statement
*/
(statement)+
;
variableStatement:
'var' variableDeclarationList SEMI
;
variableDeclarationList:
/*
* SPEC:
* variableDeclaration
* | variableDeclarationList COMMA variableDeclaration
*/
variableDeclaration (variableDeclarationTail)*
;
variableDeclarationTail:
COMMA variableDeclaration
;
variableDeclarationListNoln:
/*
* SPEC:
* variableDeclarationNoln
* | variableDeclarationListNoln COMMA variableDeclarationNoln
*/
variableDeclarationNoln (variableDeclarationListNolnTail)*
;
variableDeclarationListNolnTail:
COMMA variableDeclarationNoln
;
variableDeclaration:
identifier (initialiser)?
;
variableDeclarationNoln:
identifier (initialiserNoln)?
;
initialiser:
EQ assignmentExpression
;
initialiserNoln:
EQ assignmentExpressionNoln
;
emptyStatement:
;
expressionStatement:
/* [lookahead not a member of {{, function}} */ expression SEMI
;
ifStatement:
'if' LPAREN expression RPAREN statement 'else' statement
| 'if' LPAREN expression RPAREN statement
;
iterationStatement:
'do' statement 'while' LPAREN expression RPAREN SEMI
| 'while' LPAREN expression RPAREN statement
| 'for' LPAREN (
(expressionNoln)? SEMI (expression)? SEMI (expression)? RPAREN statement
| 'var' variableDeclarationListNoln SEMI (expression)? SEMI (expression)? RPAREN statement
| leftHandSideExpression 'in' expression RPAREN statement
| 'var' variableDeclarationNoln 'in' expression RPAREN statement
)
;
continueStatement:
'continue' /* [ no line terminator here ] */ (identifier)? SEMI
;
breakStatement:
'break' /* [ no line terminator here ] */ (identifier)? SEMI
;
returnStatement:
'return' /* [no line terminator here] */ (expression)? SEMI
;
withStatement:
'with' LPAREN expression RPAREN statement
;
switchStatement:
'switch' LPAREN expression RPAREN caseBlock
;
caseBlock:
LBRACE (caseClauses)? RBRACE
| LBRACE (caseClauses)? defaultClause (caseClauses)? RBRACE
;
caseClauses:
/*
* SPEC:
* caseClause
* | caseClauses caseClause
*/
(caseClause)+
;
caseClause:
'case' expression COLON (statementList)?
;
defaultClause:
'default' COLON (statementList)?
;
labelledStatement:
identifier COLON statement
;
throwStatement:
'throw' /* [no line terminator here] */ expression SEMI
;
tryStatement:
'try' block catch_
| 'try' block finally_
| 'try' block catch_ finally_
;
catch_:
'catch' LPAREN identifier RPAREN block
;
finally_:
'finally' block
;
/* A.5 Functions and Programs */
functionDeclaration:
'function' identifier LPAREN (formalParameterList)? LBRACE functionBody RBRACE
;
functionExpression:
'function' (identifier)? LPAREN (formalParameterList)? LBRACE functionBody RBRACE
;
formalParameterList:
/*
* SPEC:
* identifier
* | formalParameterList COMMA identifier
*/
identifier (formalParameterListTail)*
;
formalParameterListTail:
COMMA identifier
;
functionBody:
sourceElements
;
/*
* program is actually the starting element for the grammar so I have commented
* out this one and then copied it to the very beginning as that is what the
* start really is.
*/
/*program:
sourceElements
;*/
sourceElements:
/*
* SPEC:
* sourceStatement
* | sourceElements sourceElement
*/
(sourceElement)+
;
sourceElement:
statement
| functionDeclaration
;
/* A.6 URI character classes */
uri:
(uriCharacters)?
;
uriCharacters:
uriCharacter (uriCharacters)?
;
uriCharacter:
uriReserved
| uriUnescaped
| uriEscaped
;
uriReserved:
SEMI | SLASH | QUESTION | COLON | AT | AMPER | EQ | PLUS | DOLLAR | COMMA
;
uriUnescaped:
uriAlpha
| decimalDigit
| uriMark
;
uriEscaped:
PERCENT hexDigit hexDigit
;
uriAlpha:
ALPHA_CHARACTER /* consists of a-zA-Z */
;
uriMark:
MINUS | UNDERSCORE | DOT | EXCLAMATION | TILDE | ASTERISK | APOSTROPHE | LPAREN | RPAREN
;
/* A.7 Regular Exrpessions */
patter:
disjunction
;
disjunction:
alternative
| alternative PIPE disjunction
;
alternative:
/*
* SPEC:
* -empty-
*| alternative term
*/
(term)*
;
term:
assertion
| atom
| atom quantifier
;
assertion:
CAROT
| DOLLAR
| '\\b' /* double check this - looks like a space in the manual? */
| '\\B' /* double check this - looks like a space in the manual? */
;
quantifier:
quantifierPrefix
| quantifierPrefix QUESTION
;
quantifierPrefix:
ASTERISK
| PLUS
| QUESTION
| LBRACE decimalDigits RBRACE
| LBRACE decimalDigits COMMA RBRACE
| LBRACE decimalDigits COMMA decimalDigits RBRACE
;
atom:
patternCharacter
| DOT
| BSLASH atomEscape
| characterClass
| LPAREN ((COLON|EQ|EXCLAMATION)? disjunction) RPAREN
;
patternCharacter:
sourceCharacter /* but not any of:
^ $ \ . * + ? ( ) [ ] { } |
*/
;
atomEscape:
decimalEscape
| characterEscape
| characterClassEscape
;
characterEscape:
controlEscape
| LOWER_C controlLetter
| hexEscapeSequence
| unicodeEscapeSequence
| identityEscape
;
controlEscape:
CONTROL_ESCAPE_CHAR /* one of: fnrtv */
;
controlLetter:
/* one of: a-z A-Z */
LOWER_ALPHA_CHAR
| UPPER_ALPHA_CHAR
;
identityEscape:
sourceCharacter /* but not identifierPart */
;
characterClassEscape:
'd' | 'D' | 's' | 'S' | 'w' | 'W'
;
decimalEscape:
decimalIntegerLiteral /* lookahead not a member of decimalDigit */
;
characterClass:
/* [ [lookahead not a member of {^}] ClassRanges ]
| [ ^ classRanges ]
*/
;
classRanges:
/* empty */
| nonemptyClassRanges
;
nonemptyClassRanges:
classAtom
| classAtom nonemptyClassRangesNoDash
| classAtom MINUS classAtom classRanges
;
nonemptyClassRangesNoDash:
classAtom
| classAtomNoDash nonemptyClassRangesNoDash
| classAtomNoDash MINUS classAtom classRanges
;
classAtom:
MINUS
| classAtomNoDash
;
classAtomNoDash:
sourceCharacter /* but not one of: \ ] - */
| BSLASH classEscape
;
classEscape:
decimalEscape
| LOWER_B
| characterEscape
| characterClassEscape
;
/* a few basic characters and other things that I am going to see */
SEMI: ';';
MPER: '&';
//DASH: '-';
EQ: '=';
COMMA: ',';
SLASH: '/';
BSLASH: '\\';
LBRACK: '[';
RBRACK: ']';
LBRACE: '{';
RBRACE: '}';
LPAREN: '(';
RPAREN: ')';
APOSTROPHE: '\'';
QUOTE: '"';
QUESTION: '?';
COLON: ':';
ASTERISK: '*';
AT : '@';
AMPER : '&';
PLUS : '+';
MINUS:'-';
DOLLAR : '$';
UNDERSCORE:'_';
DOT : '.';
LT : '<';
GT : '>';
LTEQ: '<=';
GTEQ : '>=';
EQ2 : '==';
NOTEQ: '!=';
EQ3 : '===';
NOTEQ2: '!==';
STAR:'*';
PERCENT : '%';
PLUS2:'++';
MINUS2:'--';
LSHIFT:'<<';
RSHIFT:'>>';
GT3:'>>>';
PIPE : '|';
CAROT : '^';
EXCLAMATION: '!';
TILDE:'~';
AMPER2:'&&';
PIPE2 : '||';
PLUSEQ:'+=';
MINUSEQ:'-=';
TIMESEQ:'*=';
PERCENTEQ:'%=';
LSHIFTEQ:'<<=';
RSHIFTEQ: '>>=';
GT3EQ : '>>>=';
AMPEREQ:'&=';
PIPEEQ : '|=';
CAROTEQ : '^=';
DIVIDE : '/';
DIVIDEEQ: '/=';
ZERO : '0';
NON_ZERO_DIGIT
: ('1'..'9');
DIGIT : ('0'..'9');
EXPONENT_INDICATOR
: 'e'|'E';
CONTROL_ESCAPE_CHAR
: 'f'|'n'|'r'|'t'|'v'; /* one of: fnrtv */
LOWER_B: 'b';
LOWER_C : 'c';
LOWER_F: 'f';
LOWER_N: 'n';
LOWER_R: 'r';
LOWER_T: 't';
LOWER_V: 'v';
LOWER_X: 'x';
LOWER_U: 'u';
ALPHA_CHARACTER
: ('a'..'z')|('A'..'Z');
LOWER_ALPHA_CHAR
: ('a'..'z');
UPPER_ALPHA_CHAR
: ('a'..'z');
WS: // ignore white space as it doesn't matter
( ' ' | '\r' '\n' {newline();}| '\n' {newline();} | '\t' ) { $setType(Token.SKIP); }
;
SOURCE_CHAR: ('\u0000'..'\uFFFE');
TAB: '\u0009';
VT: '\u000b';
FF: '\u000c';
SP: '\u0020';
NBSP: '\u00a0';
USP: '\u1680' // OGHAM SPACE MARK
| '\u2000' // EN QUAD
| '\u2001' // EM QUAD
| '\u2002' // EN SPACE
| '\u2003' // EM SPACE
| '\u2004' // THREE-PER-EM SPACE
| '\u2005' // FOUR-PER-EM SPACE
| '\u2006' // SIX-PER-EM SPACE
| '\u2007' // FIGURE SPACE
| '\u2008' // PUNCTUATION SPACE
| '\u2009' // THIN SPACE
| '\u200A' // HAIR SPACE
| '\u200B' // ZERO WIDTH SPACE
| '\u202F' // NARROW NO-BREAK SPACE
| '\u3000' // IDEOGRAPHIC SPACE
;
LF: '\u000a'; // line feed
CR: '\u000d'; // carriage return
LS: '\u2028'; // line separator
PS: '\u2029'; // paragraph separator
UNICODE_LETTER:
('\u0041'..'\u005A') | ('\u0061'..'\u007A') | '\u00AA' | '\u00B5'
| '\u00BA' | ('\u00C0'..'\u00D6') | ('\u00D8'..'\u00F6') | ('\u00F8'..'\u021F')
| ('\u0222'..'\u0233') | ('\u0250'..'\u02AD') | ('\u02B0'..'\u02B8') | ('\u02BB'..'\u02C1')
| ('\u02D0'..'\u02D1') | ('\u02E0'..'\u02E4') | '\u02EE' | '\u037A'
| '\u0386' | ('\u0388'..'\u038A') | '\u038C' | ('\u038E'..'\u03A1')
| ('\u03A3'..'\u03CE') | ('\u03D0'..'\u03D7') | ('\u03DA'..'\u03F3') | ('\u0400'..'\u0481')
| ('\u048C'..'\u04C4') | ('\u04C7'..'\u04C8') | ('\u04CB'..'\u04CC') | ('\u04D0'..'\u04F5')
| ('\u04F8'..'\u04F9') | ('\u0531'..'\u0556') | '\u0559' |('\u0561'..'\u0587')
| ('\u05D0'..'\u05EA') | ('\u05F0'..'\u05F2') | ('\u0621'..'\u063A') |('\u0640'..'\u064A')
| ('\u0671'..'\u06D3') | '\u06D5' | ('\u06E5'..'\u06E6') |('\u06FA'..'\u06FC')
| '\u0710' | ('\u0712'..'\u072C') | ('\u0780'..'\u07A5') |('\u0905'..'\u0939')
| '\u093D' | '\u0950' | ('\u0958'..'\u0961') |('\u0985'..'\u098C')
| ('\u098F'..'\u0990') | ('\u0993'..'\u09A8') | ('\u09AA'..'\u09B0') | '\u09B2'
| ('\u09B6'..'\u09B9') | ('\u09DC'..'\u09DD') | ('\u09DF'..'\u09E1') |('\u09F0'..'\u09F1')
| ('\u0A05'..'\u0A0A') | ('\u0A0F'..'\u0A10') | ('\u0A13'..'\u0A28') |('\u0A2A'..'\u0A30')
| ('\u0A32'..'\u0A33') | ('\u0A35'..'\u0A36') | ('\u0A38'..'\u0A39') |('\u0A59'..'\u0A5C')
| '\u0A5E' | ('\u0A72'..'\u0A74') | ('\u0A85'..'\u0A8B') | '\u0A8D'
| ('\u0A8F'..'\u0A91') | ('\u0A93'..'\u0AA8') | ('\u0AAA'..'\u0AB0') |('\u0AB2'..'\u0AB3')
| ('\u0AB5'..'\u0AB9') | '\u0ABD' | '\u0AD0' | '\u0AE0'
| ('\u0B05'..'\u0B0C') | ('\u0B0F'..'\u0B10') | ('\u0B13'..'\u0B28') |('\u0B2A'..'\u0B30')
| ('\u0B32'..'\u0B33') | ('\u0B36'..'\u0B39') | '\u0B3D' |('\u0B5C'..'\u0B5D')
| ('\u0B5F'..'\u0B61') | ('\u0B85'..'\u0B8A') | ('\u0B8E'..'\u0B90') |('\u0B92'..'\u0B95')
| ('\u0B99'..'\u0B9A') | '\u0B9C' | ('\u0B9E'..'\u0B9F') |('\u0BA3'..'\u0BA4')
| ('\u0BA8'..'\u0BAA') | ('\u0BAE'..'\u0BB5') | ('\u0BB7'..'\u0BB9') |('\u0C05'..'\u0C0C')
| ('\u0C0E'..'\u0C10') | ('\u0C12'..'\u0C28') | ('\u0C2A'..'\u0C33') |('\u0C35'..'\u0C39')
| ('\u0C60'..'\u0C61') | ('\u0C85'..'\u0C8C') | ('\u0C8E'..'\u0C90') |('\u0C92'..'\u0CA8')
| ('\u0CAA'..'\u0CB3') | ('\u0CB5'..'\u0CB9') | '\u0CDE' |('\u0CE0'..'\u0CE1')
| ('\u0D05'..'\u0D0C') | ('\u0D0E'..'\u0D10') | ('\u0D12'..'\u0D28') |('\u0D2A'..'\u0D39')
| ('\u0D60'..'\u0D61') | ('\u0D85'..'\u0D96') | ('\u0D9A'..'\u0DB1') |('\u0DB3'..'\u0DBB')
| '\u0DBD' | ('\u0DC0'..'\u0DC6') | ('\u0E01'..'\u0E30') |('\u0E32'..'\u0E33')
| ('\u0E40'..'\u0E46') | ('\u0E81'..'\u0E82') | '\u0E84' |('\u0E87'..'\u0E88')
| '\u0E8A' | '\u0E8D' | ('\u0E94'..'\u0E97') |('\u0E99'..'\u0E9F')
| ('\u0EA1'..'\u0EA3') | '\u0EA5' | '\u0EA7' |('\u0EAA'..'\u0EAB')
| ('\u0EAD'..'\u0EB0') | ('\u0EB2'..'\u0EB3') | ('\u0EBD'..'\u0EC4') | '\u0EC6'
| ('\u0EDC'..'\u0EDD') | '\u0F00' | ('\u0F40'..'\u0F6A') |('\u0F88'..'\u0F8B')
| ('\u1000'..'\u1021') | ('\u1023'..'\u1027') | ('\u1029'..'\u102A') |('\u1050'..'\u1055')
| ('\u10A0'..'\u10C5') | ('\u10D0'..'\u10F6') | ('\u1100'..'\u1159') |('\u115F'..'\u11A2')
| ('\u11A8'..'\u11F9') | ('\u1200'..'\u1206') | ('\u1208'..'\u1246') | '\u1248'
| ('\u124A'..'\u124D') | ('\u1250'..'\u1256') | '\u1258' |('\u125A'..'\u125D')
| ('\u1260'..'\u1286') | '\u1288' | ('\u128A'..'\u128D') |('\u1290'..'\u12AE')
| '\u12B0' | ('\u12B2'..'\u12B5') | ('\u12B8'..'\u12BE') | '\u12C0'
| ('\u12C2'..'\u12C5') | ('\u12C8'..'\u12CE') | ('\u12D0'..'\u12D6') |('\u12D8'..'\u12EE')
| ('\u12F0'..'\u130E') | '\u1310' | ('\u1312'..'\u1315') |('\u1318'..'\u131E')
| ('\u1320'..'\u1346') | ('\u1348'..'\u135A') | ('\u13A0'..'\u13B0') |('\u13B1'..'\u13F4')
| ('\u1401'..'\u1676') | ('\u1681'..'\u169A') | ('\u16A0'..'\u16EA') |('\u1780'..'\u17B3')
| ('\u1820'..'\u1877') | ('\u1880'..'\u18A8') | ('\u1E00'..'\u1E9B') |('\u1EA0'..'\u1EE0')
| ('\u1EE1'..'\u1EF9') | ('\u1F00'..'\u1F15') | ('\u1F18'..'\u1F1D') |('\u1F20'..'\u1F39')
| ('\u1F3A'..'\u1F45') | ('\u1F48'..'\u1F4D') | ('\u1F50'..'\u1F57') | '\u1F59'
| '\u1F5B' | '\u1F5D' | ('\u1F5F'..'\u1F7D') | ('\u1F80'..'\u1FB4')
| ('\u1FB6'..'\u1FBC') | '\u1FBE' | ('\u1FC2'..'\u1FC4') | ('\u1FC6'..'\u1FCC')
| ('\u1FD0'..'\u1FD3') | ('\u1FD6'..'\u1FDB') | ('\u1FE0'..'\u1FEC') | ('\u1FF2'..'\u1FF4')
| ('\u1FF6'..'\u1FFC') | '\u207F' | '\u2102' | '\u2107'
| ('\u210A'..'\u2113') | '\u2115' | ('\u2119'..'\u211D') | '\u2124'
| '\u2126' | '\u2128' | ('\u212A'..'\u212D') | ('\u212F'..'\u2131')
| ('\u2133'..'\u2139') | ('\u2160'..'\u2183') | ('\u3005'..'\u3007') | ('\u3021'..'\u3029')
| ('\u3031'..'\u3035') | ('\u3038'..'\u303A') | ('\u3041'..'\u3094') | ('\u309D'..'\u309E')
| ('\u30A1'..'\u30FA') | ('\u30FC'..'\u30FE') | ('\u3105'..'\u312C') | ('\u3131'..'\u318E')
| ('\u31A0'..'\u31B7') | '\u3400' | '\u4DB5' | '\u4E00'
| '\u9FA5' | ('\uA000'..'\uA48C') | '\uAC00' | '\uD7A3'
| ('\uF900'..'\uFA2D') | ('\uFB00'..'\uFB06') | ('\uFB13'..'\uFB17') | '\uFB1D'
| ('\uFB1F'..'\uFB28') | ('\uFB2A'..'\uFB36') | ('\uFB38'..'\uFB3C') | '\uFB3E'
| ('\uFB40'..'\uFB41') | ('\uFB43'..'\uFB44') | ('\uFB46'..'\uFBB1') | ('\uFBD3'..'\uFD3D')
| ('\uFD50'..'\uFD8F') | ('\uFD92'..'\uFDC7') | ('\uFDF0'..'\uFDFB') | ('\uFE70'..'\uFE72')
| '\uFE74' | ('\uFE76'..'\uFEFC') | ('\uFF21'..'\uFF3A') | ('\uFF41'..'\uFF5A')
| ('\uFF66'..'\uFFBE') | ('\uFFC2'..'\uFFC7') | ('\uFFCA'..'\uFFCF') | ('\uFFD2'..'\uFFD7')
| ('\uFFDA'..'\uFFDC')
;
HEXDIGIT: ('0'..'9')|('a'..'f')|('A'..'F');
UNICODE_DIGIT:
('\u0030'..'\u0039') | ('\u0660'..'\u0669') | ('\u06F0'..'\u06F9') | ('\u0966'..'\u096F')
| ('\u09E6'..'\u09EF') | ('\u0A66'..'\u0A6F') | ('\u0AE6'..'\u0AEF') | ('\u0B66'..'\u0B6F')
| ('\u0BE7'..'\u0BEF') | ('\u0C66'..'\u0C6F') | ('\u0CE6'..'\u0CEF') | ('\u0D66'..'\u0D6F')
| ('\u0E50'..'\u0E59') | ('\u0ED0'..'\u0ED9') | ('\u0F20'..'\u0F29') | ('\u1040'..'\u1049')
| ('\u1369'..'\u1371') | ('\u17E0'..'\u17E9') | ('\u1810'..'\u1819') | ('\uFF10'..'\uFF19')
;
UNICODE_NONSPACING_MARK
: ('\u0300'..'\u036F'); // And more... see: http://www.fileformat.info/info/unicode/category/Mn/list.htm
UNICODE_COMBINING_MARK: // Appears to be bogus... see: http://www.fileformat.info/info/unicode/category/Mc/list.htm
('\u0300'..'\u034E') | ('\u0360'..'\u0362') | ('\u0483'..'\u0486') | ('\u0591'..'\u05A1')
| ('\u05A3'..'\u05B9') | ('\u05BB'..'\u05BD') | '\u05BF' | ('\u05C1'..'\u05C2')
| '\u05C4' | ('\u064B'..'\u0655') | '\u0670' | ('\u06D6'..'\u06DC')
| ('\u06DF'..'\u06E4') | ('\u06E7'..'\u06E8') | ('\u06EA'..'\u06ED') | '\u0711'
| ('\u0730'..'\u074A') | ('\u07A6'..'\u07B0') | ('\u0901'..'\u0903') | '\u093C'
| ('\u093E'..'\u094D') | ('\u0951'..'\u0954') | ('\u0962'..'\u0963') | ('\u0981'..'\u0983')
| ('\u09BC'..'\u09C4') | ('\u09C7'..'\u09C8') | ('\u09CB'..'\u09CD') | '\u09D7'
| ('\u09E2'..'\u09E3') | '\u0A02' | '\u0A3C' | ('\u0A3E'..'\u0A42')
| ('\u0A47'..'\u0A48') | ('\u0A4B'..'\u0A4D') | ('\u0A70'..'\u0A71') | ('\u0A81'..'\u0A83')
| '\u0ABC' | ('\u0ABE'..'\u0AC5') | ('\u0AC7'..'\u0AC9') | ('\u0ACB'..'\u0ACD')
| ('\u0B01'..'\u0B03') | '\u0B3C' | ('\u0B3E'..'\u0B43') | ('\u0B47'..'\u0B48')
| ('\u0B4B'..'\u0B4D') | ('\u0B56'..'\u0B57') | ('\u0B82'..'\u0B83') | ('\u0BBE'..'\u0BC2')
| ('\u0BC6'..'\u0BC8') | ('\u0BCA'..'\u0BCD') | '\u0BD7' | ('\u0C01'..'\u0C03')
| ('\u0C3E'..'\u0C44') | ('\u0C46'..'\u0C48') | ('\u0C4A'..'\u0C4D') | ('\u0C55'..'\u0C56')
| ('\u0C82'..'\u0C83') | ('\u0CBE'..'\u0CC4') | ('\u0CC6'..'\u0CC8') | ('\u0CCA'..'\u0CCD')
| ('\u0CD5'..'\u0CD6') | ('\u0D02'..'\u0D03') | ('\u0D3E'..'\u0D43') | ('\u0D46'..'\u0D48')
| ('\u0D4A'..'\u0D4D') | '\u0D57' | ('\u0D82'..'\u0D83') | '\u0DCA'
| ('\u0DCF'..'\u0DD4') | '\u0DD6' | ('\u0DD8'..'\u0DDF') | ('\u0DF2'..'\u0DF3')
| '\u0E31' | ('\u0E34'..'\u0E3A') | ('\u0E47'..'\u0E4E') | '\u0EB1'
| ('\u0EB4'..'\u0EB9') | ('\u0EBB'..'\u0EBC') | ('\u0EC8'..'\u0ECD') | ('\u0F18'..'\u0F19')
| '\u0F35' | '\u0F37' | '\u0F39' | ('\u0F3E'..'\u0F3F')
| ('\u0F71'..'\u0F84') | ('\u0F86'..'\u0F87') | ('\u0F90'..'\u0F97') | ('\u0F99'..'\u0FBC')
| '\u0FC6' | ('\u102C'..'\u1032') | ('\u1036'..'\u1039') | ('\u1056'..'\u1059')
| ('\u17B4'..'\u17D3') | '\u18A9' | ('\u20D0'..'\u20DC') | '\u20E1'
| ('\u302A'..'\u302F') | ('\u3099'..'\u309A') | '\uFB1E' | ('\uFE20'..'\uFE23')
;
UNICODE_CONNECTOR_PUNCTUATION:
'\u005F' | ('\u203F'..'\u2040') | '\u30FB' | ('\uFE33'..'\uFE34') | ('\uFE4D'..'\uFE4F')
| '\uFF3F' | '\uFF65'
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment