Last active
August 29, 2015 14:26
-
-
Save Centaur/7cbd0e3611f7aa78ccaa to your computer and use it in GitHub Desktop.
javascript grammar from https://github.com/kre/Kinetic-Rules-Engine/blob/master/parser/ecmascriptA3.g
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar ECMAScript; | |
options { | |
// Allow any char but \uFFFF (16 bit -1) | |
charVocabulary='\u0000'..'\uFFFE'; | |
} | |
/* this comes from section A.5 but is really the starting point, so | |
* it is present here. | |
*/ | |
program: | |
sourceElements EOF | |
; | |
/* A.1 Lexical Grammar */ | |
sourceCharacter: | |
/* any unicode character */ | |
/* see section 6 */ | |
SOURCE_CHAR | |
; | |
inputElementDiv: | |
whiteSpace | |
| lineTerminator | |
| comment | |
| token | |
| divPunctuator | |
; | |
inputElementRegExp: | |
whiteSpace | |
| lineTerminator | |
| comment | |
| token | |
| regularExpressionLiteral | |
; | |
whiteSpace: | |
TAB | |
| VT | |
| FF | |
| SP | |
| NBSP | |
| USP | |
; | |
lineTerminator: | |
| LF | |
| CR | |
| LS | |
| PS | |
; | |
comment: | |
multiLineComment | |
| singleLineComment | |
; | |
multiLineComment: | |
'/*' multiLineCommentChars '*/' | |
; | |
multiLineCommentChars: | |
multiLineNotAsterikChar multiLineCommentChars | |
| ASTERISK postAsterikCommentChars | |
; | |
postAsterikCommentChars: | |
multiLineNotForwardSlashOrAsterikChar multiLineCommentChars | |
| ASTERISK postAsterikCommentChars | |
; | |
multiLineNotAsterikChar: | |
sourceCharacter /* but not ASTERISK */ | |
; | |
multiLineNotForwardSlashOrAsterikChar: | |
sourceCharacter /* but not FORWARD-SLASH or ASTERISK */ | |
; | |
singleLineComment: | |
'//' singleLineCommentChars | |
; | |
singleLineCommentChars: | |
singleLineCommentChar singleLineCommentChars | |
; | |
singleLineCommentChar: | |
sourceCharacter /* but not lineTerminator */ | |
; | |
token: | |
reservedWord | |
| identifier | |
/* syntactic predicate used to disambiguate between DOT on ALT 3 and ALT 5 */ | |
| (DOT decimalDigits)=> numericLiteral | |
/* syntactic predicate used to disambiguate between APOSTROPHE on ALT 4 and ALT 5 */ | |
| (APOSTROPHE (singleStringCharacters)? APOSTROPHE)=> stringLiteral | |
| punctuator | |
; | |
reservedWord: | |
keyword | |
| futureReservedWord | |
| nullLiteral | |
| booleanLiteral | |
; | |
keyword: | |
'break' | 'case' | 'catch' | 'continue' | 'default' | 'delete' | 'do' | |
| 'else' | 'finally' | 'for' | 'function' | 'if' | 'in' | 'instanceof' | |
| 'new' | 'return' | 'switch' | 'this' | 'throw' | 'try' | 'typeof' | |
| 'var' | 'void' | 'while' | 'with' | |
; | |
futureReservedWord: | |
'abstract' | 'boolean' | 'byte' | 'char' | 'class' | 'const' | 'debugger' | 'double' | |
| 'enum' | 'export' | 'extends' | 'final' | 'float' | 'goto' | 'implements' | 'import' | |
| 'int' | 'interface' | 'long' | 'native' | 'package' | 'private' | 'protected' | 'public' | |
| 'short' | 'static' | 'super' | 'synchronized' | 'throws' | 'transient' | 'volatile' | |
; | |
identifier: | |
identifierName /* but not reservedWord */ | |
; | |
identifierName: | |
/* | |
* left factorization: | |
* | |
* SPEC: | |
* identifierStart | |
* | identifierName identifierPart | |
* -> | |
* ((identifierStart)+ (identifierPart)?)+ | |
*/ | |
identifierStart (identifierPart)* | |
; | |
identifierStart: | |
unicodeLetter | |
| DOLLAR | |
| UNDERSCORE | |
| unicodeEscapeSequence | |
; | |
identifierPart: | |
/* syntactic predicate to remove non-determinism upon alts 1 and 5 | |
* - always choose the identifierStart when possible | |
*/ | |
(unicodeEscapeSequence identifierPart)=> identifierStart | |
| unicodeCombiningMark | |
| unicodeDigit | |
| unicodeConnectorPunctuation | |
| unicodeEscapeSequence | |
; | |
unicodeLetter: | |
/* any character in the unicode categories: | |
"uppercase letter (Lu)", | |
"lowercase letter (Li)", | |
"titlecase letter (Lt)", | |
"modifier letter (Lm)", | |
"other letter (lo)", | |
"letter number (NI)" */ | |
UNICODE_LETTER | |
; | |
unicodeCombiningMark: | |
/* any character in the unicode categories: | |
"non-spacing mark (Mn)" | |
"combining spacing mark (Mc)" | |
*/ | |
UNICODE_NONSPACING_MARK | |
| UNICODE_COMBINING_MARK | |
; | |
unicodeDigit: | |
/* any character in the unicode category "decimal number (Nd)" */ | |
UNICODE_DIGIT | |
; | |
unicodeConnectorPunctuation: | |
/* any character in the unicode category "connector punctuation (Pc)" */ | |
UNICODE_CONNECTOR_PUNCTUATION | |
; | |
unicodeEscapeSequence: | |
'\\u' hexDigit hexDigit hexDigit hexDigit | |
; | |
hexDigit: | |
/* explicitly enumerated in grammar */ | |
HEXDIGIT | |
; | |
punctuator: | |
LBRACE | RBRACE | LPAREN | RPAREN | LBRACK | RBRACK | |
| DOT | SEMI | APOSTROPHE | LT | GT | LTEQ | |
| GTEQ | EQ2 | NOTEQ | EQ3 | NOTEQ2 | |
| PLUS | MINUS | STAR | PERCENT | PLUS2 | MINUS2 | |
| LSHIFT | RSHIFT | GT3 | AMPER | PIPE | CAROT | |
| EXCLAMATION | TILDE | AMPER2 | PIPE2 | QUESTION | COLON | |
| EQ | PLUSEQ | MINUSEQ | TIMESEQ | PERCENTEQ | LSHIFTEQ | |
| RSHIFTEQ | GT3EQ | AMPEREQ | PIPEEQ | CAROTEQ | |
; | |
divPunctuator: | |
DIVIDE | |
| DIVIDEEQ | |
; | |
literal: | |
nullLiteral | |
| booleanLiteral | |
| numericLiteral | |
| stringLiteral | |
; | |
nullLiteral: | |
'null' | |
; | |
booleanLiteral: | |
'true' | |
| 'false' | |
; | |
numericLiteral: | |
decimalLiteral | |
| hexIntegerLiteral | |
; | |
decimalLiteral: | |
/* | |
* SPEC: | |
* decimalIntegerLiteral DOT (decimalDigits)? (exponentPart)? | |
* | DOT decimalDigits (exponentPart)? | |
* | decimalIntegerLiteral (exponentPart)? | |
* -> | |
*/ | |
decimalIntegerLiteral decimalIntegerLiteralTail | |
| DOT decimalDigits (exponentPart)? | |
; | |
decimalIntegerLiteralTail: | |
/* this is necessary because of the way the grammar gets parsed */ | |
DOT (decimalDigits)? (exponentPart)? | |
| exponentPart | |
; | |
decimalIntegerLiteral: | |
ZERO | |
| NON_ZERO_DIGIT (decimalDigits)? | |
; | |
decimalDigits: | |
/* | |
* SPEC: | |
* decimalDigit | |
* | decimalDigits decimalDigit | |
* => | |
* (decimalDigits)+ | |
*/ | |
(decimalDigit)+ | |
; | |
decimalDigit: | |
DIGIT /* grammar has each one explicitely listed */ | |
; | |
exponentIndicator: | |
EXPONENT_INDICATOR /* grammar has both e and E listed */ | |
; | |
signedInteger: | |
decimalDigits | |
| PLUS decimalDigits | |
| MINUS decimalDigits | |
; | |
hexIntegerLiteral: | |
/* | |
* SPEC: | |
* '0x' hexDigit | |
* | '0X' hexDigit | |
* | hexIntegerLiteral hexDigit | |
* -> | |
* ('0x'|'0X') (hexDigit)+ | |
*/ | |
('0x'|'0X') (hexDigit)+ | |
; | |
stringLiteral: | |
QUOTE (doubleStringCharacters)? QUOTE | |
| APOSTROPHE (singleStringCharacters)? APOSTROPHE | |
; | |
doubleStringCharacters: | |
doubleStringCharacter (doubleStringCharacters)? | |
; | |
singleStringCharacters: | |
singleStringCharacter (singleStringCharacters)? | |
; | |
doubleStringCharacter: | |
sourceCharacter /* but not double quote or backslash or line terminator */ | |
BSLASH escapeSequence | |
; | |
singleStringCharacter: | |
sourceCharacter /* but not single quote or backslash or line terminator */ | |
| BSLASH escapeSequence | |
; | |
escapeSequence: | |
characterEscapeSequence | |
| ZERO /* [lookahead not a member of decimalDigit] */ | |
| hexEscapeSequence | |
| unicodeEscapeSequence | |
; | |
characterEscapeSequence: | |
singleEscapeCharacter | |
| nonEscapeCharacter | |
; | |
singleEscapeCharacter: | |
APOSTROPHE | QUOTE | BSLASH | LOWER_B | LOWER_F | LOWER_N | LOWER_R | LOWER_T | LOWER_V | |
; | |
nonEscapeCharacter: | |
sourceCharacter /* but not escapeCharacter or lineTerminator */ | |
; | |
escapeCharacter: | |
singleEscapeCharacter | |
| decimalDigit | |
| LOWER_X | |
| LOWER_U | |
; | |
hexEscapeSequence: | |
LOWER_X hexDigit hexDigit | |
; | |
/* defined above | |
unicodeEscapeSequence: | |
LOWER_U hexDigit hexDigit hexDigit hexDigit | |
;*/ | |
regularExpressionLiteral: | |
SLASH regularExpressionBody SLASH regularExpressionFlags | |
; | |
regularExpressionBody: | |
regularExpressionFirstChar regularExpressionChars | |
; | |
regularExpressionChars: | |
/* [empty] */ | |
/* | |
* SPEC: regularExpressionChars regularExpressionChar | |
* ->: regularExpressionChar regularExpressionChars | |
*/ | |
regularExpressionChar regularExpressionChars | |
; | |
regularExpressionFirstChar: | |
nonTerminator /* but not * or \ or / */ | |
| backslashSequence | |
; | |
regularExpressionChar: | |
nonTerminator /* but not * or \ or / */ | |
| backslashSequence | |
; | |
backslashSequence: | |
BSLASH nonTerminator | |
; | |
nonTerminator: | |
sourceCharacter /* but not lineTerminator */ | |
; | |
regularExpressionFlags: | |
/* | |
* SPEC: | |
* [empty] | regularExpressionFlags identifierPart | |
* -> | |
* (identifierPart|)* | |
*/ | |
(identifierPart)* | |
; | |
/* A.2 Number Conversions */ | |
stringNumericLiteral: | |
/* | |
* SPEC: | |
* (strWhiteSpace)? | |
* | (strWhiteSpace)? strNumericLiteral (strWhiteSpace)? | |
*/ | |
(strWhiteSpace)? (strNumericLiteral (strWhiteSpace)?)? | |
; | |
strWhiteSpace: | |
strWhiteSpaceChar (strWhiteSpace)? | |
; | |
strWhiteSpaceChar: | |
TAB | SP | NBSP | FF | VT | CR | LF | LS | PS | USP | |
; | |
strNumericLiteral: | |
strDecimalLiteral | |
| hexIntegerLiteral | |
; | |
strDecimalLiteral: | |
strUnsignedDecimalLiteral | |
| PLUS strUnsignedDecimalLiteral | |
| MINUS strUnsignedDecimalLiteral | |
; | |
strUnsignedDecimalLiteral: | |
/* | |
* SPEC: | |
* 'Infinity' | |
* | decimalDigits DOT (decimalDigits)? (exponentPart)? | |
* | DOT decimalDigits (exponentPart)? | |
* | decimalDigits (exponentPart)? | |
*/ | |
'Infinity' | |
/* syntactic predicate used to remove nondet between ALTs 2 and 4 */ | |
| (decimalDigits DOT)=> decimalDigits DOT (decimalDigits)? (exponentPart)? | |
| DOT decimalDigits (exponentPart)? | |
| decimalDigits (exponentPart)? | |
; | |
/* defined above | |
decimalDigits: | |
decimalDigit | |
| decimalDigits decimalDigit | |
;*/ | |
/* decimalDigit: | |
DIGIT // grammar has them explicitly enumerated | |
;*/ | |
exponentPart: | |
exponentIndicator signedInteger | |
; | |
/* redefined above | |
exponentIndicator: | |
EXPONENT_INDICATOR | |
;*/ | |
/* redefined above | |
signedInteger: | |
decimalDigits | |
| PLUS decimalDigits | |
| MINUS decimalDigits | |
;*/ | |
/* redefined above | |
hexIntegerLiteral: | |
'0x' hexDigit | |
| '0X' hexDigit | |
| hexIntegerLiteral hexDigit | |
;*/ | |
/* redefined above | |
hexDigit: | |
HEXDIGIT // grammar has them explicitely enumerated | |
; */ | |
/* A.3 Expressions */ | |
primaryExpression: | |
'this' | |
| identifier | |
| literal | |
| arrayLiteral | |
| objectLiteral | |
| LPAREN expression RPAREN | |
; | |
arrayLiteral: | |
/* | |
* SPEC: | |
* LBRACK (elision)? RBRACK | |
* | LBRACK elementList RBRACK | |
* | LBRACK elementList COMMA (elision)? RBRACK | |
*/ | |
LBRACK ( | |
(elision)? | |
| elementList (COMMA (elision)?)? | |
) RBRACK | |
; | |
elementList: | |
/* | |
* SPEC: | |
* (elision)? assignmentExpression | |
* | elementList COMMA (elision)? assignmentExpression | |
*/ | |
(elision)? assignmentExpression (elementListTail)* | |
; | |
elementListTail: | |
COMMA (elision)? assignmentExpression | |
; | |
elision: | |
/* | |
* SPEC: | |
* COMMA | |
* | elision COMMA | |
* -> | |
* (COMMA)+ | |
*/ | |
(COMMA)+ | |
; | |
objectLiteral: | |
/* | |
* SPEC: | |
* LBRACE RBRACE | |
* | LBRACE propertyNameAndValueList RBRACE | |
*/ | |
LBRACE (propertyNameAndValueList)? RBRACE | |
; | |
propertyNameAndValueList: | |
/* | |
* SPEC: | |
* propertyName COLON assignmentExpression | |
* | propertyNameAndValueList COMMA propertyName COLON assignmentExpression | |
*/ | |
propertyName COLON assignmentExpression (propertyNameAndValueListTail)* | |
; | |
propertyNameAndValueListTail: | |
COMMA propertyName COLON assignmentExpression | |
; | |
propertyName: | |
identifier | |
| stringLiteral | |
| numericLiteral | |
; | |
memberExpression: | |
/* | |
* SPEC: | |
* primaryExpression | |
* | functionExpression | |
* | memberExpression LBRACK expression RBRACK | |
* | memberExpression DOT identifier | |
* | 'new' memberExpression arguments | |
* -> | |
*/ | |
( primaryExpression | |
| functionExpression | |
| 'new' memberExpression arguments | |
) (memberExpressionTail)* | |
; | |
memberExpressionTail: | |
/* SPEC: not a part of formal grammar */ | |
LBRACK expression RBRACK | |
| DOT identifier | |
; | |
newExpression: | |
/* | |
* SPEC: | |
* memberExpression | |
* | 'new' newExpression | |
*/ | |
/* syntactic predicate added to resolve between nondet in ALTs 1 and 2 */ | |
('new' newExpression)=> 'new' newExpression | |
| memberExpression | |
; | |
callExpression: | |
/* | |
* SPEC: | |
* memberExpression arguments | |
* | callExpression arguments | |
* | callExpression LBRACK expression RBRACK | |
* | callExpression DOT identifier | |
*/ | |
memberExpression arguments (callExpressionTail)* | |
; | |
callExpressionTail: | |
arguments | |
| LBRACK expression RBRACK | |
| DOT identifier | |
; | |
arguments: | |
/* | |
* SPEC: | |
* LPAREN RPAREN | |
* | LPAREN argumentList RPAREN | |
*/ | |
LPAREN (argumentList)? RPAREN | |
; | |
argumentList: | |
/* | |
* SPEC: | |
* assignmentExpression | |
* | argumentList COMMA assignmentExpression | |
*/ | |
assignmentExpression (argumentListTail)* | |
; | |
argumentListTail: | |
COMMA assignmentExpression | |
; | |
leftHandSideExpression: | |
/* | |
* SPEC: | |
* newExpression | |
* | callExpression | |
*/ | |
('new' newExpression)=> newExpression | |
| callExpression | |
; | |
postfixExpression: | |
/* | |
* SPEC: | |
* leftHandSideExpression | |
* | leftHandSideExpression / no line terminator here / PLUS2 | |
* | leftHandSideExpression / no line terminator here / MINUS2 | |
*/ | |
leftHandSideExpression (PLUS2|MINUS2)? | |
; | |
unaryExpression: | |
postfixExpression | |
| 'delete' unaryExpression | |
| 'void' unaryExpression | |
| 'typeof' unaryExpression | |
| PLUS2 unaryExpression | |
| MINUS2 unaryExpression | |
| PLUS unaryExpression | |
| MINUS unaryExpression | |
| TILDE unaryExpression | |
| EXCLAMATION unaryExpression | |
; | |
multiplicativeExpression: | |
/* | |
* SPEC: | |
* unaryExpression | |
* | multiplicativeExpression ASTERISK unaryExpression | |
* | multiplicativeExpression DIVIDE unaryExpression | |
* | multiplicativeExpression PERCENT unaryExpression | |
*/ | |
unaryExpression (multiplicativeExpressionTail)* | |
; | |
multiplicativeExpressionTail: | |
ASTERISK unaryExpression | |
| DIVIDE unaryExpression | |
| PERCENT unaryExpression | |
; | |
additiveExpression: | |
/* | |
* SPEC: | |
* multiplicativeExpression | |
* | additiveExpression PLUS multiplicativeExpression | |
* | additiveExpression MINUS multiplicativeExpression | |
*/ | |
multiplicativeExpression (additiveExpressionTail)* | |
; | |
additiveExpressionTail: | |
PLUS multiplicativeExpression | |
| MINUS multiplicativeExpression | |
; | |
shiftExpression: | |
/* | |
* SPEC: | |
* additiveExpression | |
* | shiftExpression LSHIFT additiveExpression | |
* | shiftExpression RSHIFT additiveExpression | |
* | shiftExpression GT3 additiveExpression | |
*/ | |
additiveExpression (shiftExpressionTail)* | |
; | |
shiftExpressionTail: | |
LSHIFT additiveExpression | |
| RSHIFT additiveExpression | |
| GT3 additiveExpression | |
; | |
relationalExpression: | |
/* | |
* SPEC: | |
* shiftExpression | |
* | relationalExpression LT shiftExpression | |
* | relationalExpression GT shiftExpression | |
* | relationalExpression LTEQ shiftExpression | |
* | relationalExpression GTEQ shiftExpression | |
* | relationalExpression 'instanceof' shiftExpression | |
* | relationalExpression 'in' shiftExpression | |
*/ | |
shiftExpression (relationalExpressionTail)* | |
; | |
relationalExpressionTail: | |
LT shiftExpression | |
| GT shiftExpression | |
| LTEQ shiftExpression | |
| GTEQ shiftExpression | |
| 'instanceof' shiftExpression | |
| 'in' shiftExpression | |
; | |
relationalExpressionNoln: | |
shiftExpression | |
| relationalExpression (LT|GT|LTEQ|GTEQ|'instanceof') shiftExpression | |
; | |
equalityExpression: | |
/* | |
* SPEC: | |
* relationalExpression | |
* | equalityExpression (EQ2|NOTEQQ|EQ3|NOTEQQ2) relationalExpression | |
*/ | |
relationalExpression (equalityExpressionTail)* | |
; | |
equalityExpressionTail: | |
(EQ2|NOTEQ|EQ3|NOTEQ2) relationalExpression | |
; | |
equalityExpressionNoln: | |
/* | |
* SPEC: | |
* relationalExpressionNoln | |
* | equalityExpressionNoln (EQ2|NOTEQQ|EQ3|NOTEQQ2) relationalExpressionNoln | |
*/ | |
relationalExpressionNoln (equalityExpressionNolnTail)* | |
; | |
equalityExpressionNolnTail: | |
(EQ2|NOTEQ|EQ3|NOTEQ2) relationalExpressionNoln | |
; | |
bitwiseAndExpression: | |
/* | |
* SPEC: | |
* equalityExpression | |
* | bitwiseAndExpression AMPER equalityExpression | |
*/ | |
equalityExpression (bitwiseAndExpressionTail)* | |
; | |
bitwiseAndExpressionTail: | |
AMPER equalityExpression | |
; | |
bitwiseAndExpressionNoln: | |
/* | |
* SPEC: | |
* equalityExpressionNoln | |
* | bitwiseAndExpressionNoln AMPER equalityExpressionNoln | |
*/ | |
equalityExpressionNoln (bitwiseAndExpressionNolnTail)* | |
; | |
bitwiseAndExpressionNolnTail: | |
AMPER equalityExpressionNoln | |
; | |
bitwiseXorExpression: | |
/* | |
* SPEC: | |
* bitwiseAndExpression | |
* | bitwiseXorExpression CAROT bitwiseAndExpression | |
*/ | |
bitwiseAndExpression (bitwiseXorExpressionTail)* | |
; | |
bitwiseXorExpressionTail: | |
CAROT bitwiseAndExpression | |
; | |
bitwiseXorExpressionNoln: | |
/* | |
* SPEC: | |
* bitwiseAndExpressionNoln | |
* | bitwiseXorExpressionNoln CAROT bitwiseAndExpressionNoln | |
*/ | |
bitwiseAndExpressionNoln (bitwiseXorExpressionNolnTail)* | |
; | |
bitwiseXorExpressionNolnTail: | |
CAROT bitwiseAndExpressionNoln | |
; | |
bitwiseOrExpression: | |
/* | |
* SPEC: | |
* bitwiseXorExpression | |
* | bitwiseOrExpression PIPE bitwiseXorExpression | |
*/ | |
bitwiseXorExpression (bitwiseOrExpressionTail)* | |
; | |
bitwiseOrExpressionTail: | |
PIPE bitwiseXorExpression | |
; | |
bitwiseOrExpressionNoln: | |
/* | |
* SPEC: | |
* bitwiseXorExpressionNoln | |
* | bitwiseOrExpressionNoln PIPE bitwiseXorExpressionNoln | |
*/ | |
bitwiseXorExpressionNoln (bitwiseOrExpressionNolnTail)* | |
; | |
bitwiseOrExpressionNolnTail: | |
PIPE bitwiseOrExpressionNolnTail | |
; | |
logicalAndExpression: | |
bitwiseOrExpression | |
| logicalAndExpressionNoln AMPER2 bitwiseOrExpression | |
; | |
logicalAndExpressionNoln: | |
/* | |
* SPEC: | |
* bitwiseOrExpressionNoln | |
* | logicalAndExpressionNoln AMPER2 bitwiseOrExpressionNoln | |
*/ | |
bitwiseOrExpressionNoln (logicalAndExpressionNolnTail)* | |
; | |
logicalAndExpressionNolnTail: | |
AMPER2 bitwiseOrExpressionNoln | |
; | |
logicalOrExpression: | |
/* | |
* SPEC: | |
* logicalAndExpression | |
* | logicalOrExpression '||' logicalAndExpression | |
*/ | |
logicalAndExpression (logicalOrExpressionTail)* | |
; | |
logicalOrExpressionTail: | |
'||' logicalAndExpression | |
; | |
logicalOrExpressionNoln: | |
/* | |
* SPEC: | |
* logicalAndExpressionNoln | |
* | logicalOrExpressionNoln '||' logicalAndExpressionNoln | |
*/ | |
logicalAndExpressionNoln (logicalOrExpressionNolnTail)* | |
; | |
logicalOrExpressionNolnTail: | |
'||' logicalAndExpressionNoln | |
; | |
conditionalExpression: | |
logicalOrExpression | |
| logicalOrExpression QUESTION assignmentExpression COLON assignmentExpression | |
; | |
conditionalExpressionNoln: | |
logicalOrExpressionNoln | |
| logicalOrExpressionNoln QUESTION assignmentExpressionNoln COLON assignmentExpressionNoln | |
; | |
assignmentExpression: | |
/* | |
* SPEC: | |
* conditionalExpression | |
* | leftHandSideExpression assignmentOperator assignmentExpression | |
*/ | |
(leftHandSideExpression assignmentOperator) => | |
leftHandSideExpression assignmentOperator assignmentExpression | |
| conditionalExpression | |
; | |
assignmentExpressionNoln: | |
conditionalExpressionNoln | |
| leftHandSideExpression assignmentOperator assignmentExpressionNoln | |
; | |
assignmentOperator: | |
/* note that in the grammar these are listed out explicitely */ | |
EQ | TIMESEQ | DIVIDEEQ | PERCENTEQ | PLUSEQ | MINUSEQ | LSHIFTEQ | RSHIFTEQ | |
| GT3EQ | AMPEREQ | CAROTEQ | PIPEEQ | |
; | |
expression: | |
/* | |
* SPEC: | |
* assignmentExpression | |
* | expression COMMA assignmentExpression | |
*/ | |
assignmentExpression (expressionTail)* | |
; | |
expressionTail: | |
COMMA assignmentExpression | |
; | |
expressionNoln: | |
/* | |
* SPEC: | |
* assignmentExpressionNoln | |
* | expressionNoln COMMA assignmentExpressionNoln | |
*/ | |
assignmentExpressionNoln (expressionNolnTail)* | |
; | |
expressionNolnTail: | |
COMMA assignmentExpressionNoln | |
; | |
/* A.4 Statements */ | |
statement: | |
block | |
| variableStatement | |
| emptyStatement | |
| expressionStatement | |
| ifStatement | |
| iterationStatement | |
| continueStatement | |
| breakStatement | |
| returnStatement | |
| withStatement | |
| labelledStatement | |
| switchStatement | |
| throwStatement | |
| tryStatement | |
; | |
block: | |
LBRACE (statementList)? RBRACE | |
; | |
statementList: | |
/* | |
* SPEC: | |
* statement | |
* | statementList statement | |
*/ | |
(statement)+ | |
; | |
variableStatement: | |
'var' variableDeclarationList SEMI | |
; | |
variableDeclarationList: | |
/* | |
* SPEC: | |
* variableDeclaration | |
* | variableDeclarationList COMMA variableDeclaration | |
*/ | |
variableDeclaration (variableDeclarationTail)* | |
; | |
variableDeclarationTail: | |
COMMA variableDeclaration | |
; | |
variableDeclarationListNoln: | |
/* | |
* SPEC: | |
* variableDeclarationNoln | |
* | variableDeclarationListNoln COMMA variableDeclarationNoln | |
*/ | |
variableDeclarationNoln (variableDeclarationListNolnTail)* | |
; | |
variableDeclarationListNolnTail: | |
COMMA variableDeclarationNoln | |
; | |
variableDeclaration: | |
identifier (initialiser)? | |
; | |
variableDeclarationNoln: | |
identifier (initialiserNoln)? | |
; | |
initialiser: | |
EQ assignmentExpression | |
; | |
initialiserNoln: | |
EQ assignmentExpressionNoln | |
; | |
emptyStatement: | |
; | |
expressionStatement: | |
/* [lookahead not a member of {{, function}} */ expression SEMI | |
; | |
ifStatement: | |
'if' LPAREN expression RPAREN statement 'else' statement | |
| 'if' LPAREN expression RPAREN statement | |
; | |
iterationStatement: | |
'do' statement 'while' LPAREN expression RPAREN SEMI | |
| 'while' LPAREN expression RPAREN statement | |
| 'for' LPAREN ( | |
(expressionNoln)? SEMI (expression)? SEMI (expression)? RPAREN statement | |
| 'var' variableDeclarationListNoln SEMI (expression)? SEMI (expression)? RPAREN statement | |
| leftHandSideExpression 'in' expression RPAREN statement | |
| 'var' variableDeclarationNoln 'in' expression RPAREN statement | |
) | |
; | |
continueStatement: | |
'continue' /* [ no line terminator here ] */ (identifier)? SEMI | |
; | |
breakStatement: | |
'break' /* [ no line terminator here ] */ (identifier)? SEMI | |
; | |
returnStatement: | |
'return' /* [no line terminator here] */ (expression)? SEMI | |
; | |
withStatement: | |
'with' LPAREN expression RPAREN statement | |
; | |
switchStatement: | |
'switch' LPAREN expression RPAREN caseBlock | |
; | |
caseBlock: | |
LBRACE (caseClauses)? RBRACE | |
| LBRACE (caseClauses)? defaultClause (caseClauses)? RBRACE | |
; | |
caseClauses: | |
/* | |
* SPEC: | |
* caseClause | |
* | caseClauses caseClause | |
*/ | |
(caseClause)+ | |
; | |
caseClause: | |
'case' expression COLON (statementList)? | |
; | |
defaultClause: | |
'default' COLON (statementList)? | |
; | |
labelledStatement: | |
identifier COLON statement | |
; | |
throwStatement: | |
'throw' /* [no line terminator here] */ expression SEMI | |
; | |
tryStatement: | |
'try' block catch_ | |
| 'try' block finally_ | |
| 'try' block catch_ finally_ | |
; | |
catch_: | |
'catch' LPAREN identifier RPAREN block | |
; | |
finally_: | |
'finally' block | |
; | |
/* A.5 Functions and Programs */ | |
functionDeclaration: | |
'function' identifier LPAREN (formalParameterList)? LBRACE functionBody RBRACE | |
; | |
functionExpression: | |
'function' (identifier)? LPAREN (formalParameterList)? LBRACE functionBody RBRACE | |
; | |
formalParameterList: | |
/* | |
* SPEC: | |
* identifier | |
* | formalParameterList COMMA identifier | |
*/ | |
identifier (formalParameterListTail)* | |
; | |
formalParameterListTail: | |
COMMA identifier | |
; | |
functionBody: | |
sourceElements | |
; | |
/* | |
* program is actually the starting element for the grammar so I have commented | |
* out this one and then copied it to the very beginning as that is what the | |
* start really is. | |
*/ | |
/*program: | |
sourceElements | |
;*/ | |
sourceElements: | |
/* | |
* SPEC: | |
* sourceStatement | |
* | sourceElements sourceElement | |
*/ | |
(sourceElement)+ | |
; | |
sourceElement: | |
statement | |
| functionDeclaration | |
; | |
/* A.6 URI character classes */ | |
uri: | |
(uriCharacters)? | |
; | |
uriCharacters: | |
uriCharacter (uriCharacters)? | |
; | |
uriCharacter: | |
uriReserved | |
| uriUnescaped | |
| uriEscaped | |
; | |
uriReserved: | |
SEMI | SLASH | QUESTION | COLON | AT | AMPER | EQ | PLUS | DOLLAR | COMMA | |
; | |
uriUnescaped: | |
uriAlpha | |
| decimalDigit | |
| uriMark | |
; | |
uriEscaped: | |
PERCENT hexDigit hexDigit | |
; | |
uriAlpha: | |
ALPHA_CHARACTER /* consists of a-zA-Z */ | |
; | |
uriMark: | |
MINUS | UNDERSCORE | DOT | EXCLAMATION | TILDE | ASTERISK | APOSTROPHE | LPAREN | RPAREN | |
; | |
/* A.7 Regular Exrpessions */ | |
patter: | |
disjunction | |
; | |
disjunction: | |
alternative | |
| alternative PIPE disjunction | |
; | |
alternative: | |
/* | |
* SPEC: | |
* -empty- | |
*| alternative term | |
*/ | |
(term)* | |
; | |
term: | |
assertion | |
| atom | |
| atom quantifier | |
; | |
assertion: | |
CAROT | |
| DOLLAR | |
| '\\b' /* double check this - looks like a space in the manual? */ | |
| '\\B' /* double check this - looks like a space in the manual? */ | |
; | |
quantifier: | |
quantifierPrefix | |
| quantifierPrefix QUESTION | |
; | |
quantifierPrefix: | |
ASTERISK | |
| PLUS | |
| QUESTION | |
| LBRACE decimalDigits RBRACE | |
| LBRACE decimalDigits COMMA RBRACE | |
| LBRACE decimalDigits COMMA decimalDigits RBRACE | |
; | |
atom: | |
patternCharacter | |
| DOT | |
| BSLASH atomEscape | |
| characterClass | |
| LPAREN ((COLON|EQ|EXCLAMATION)? disjunction) RPAREN | |
; | |
patternCharacter: | |
sourceCharacter /* but not any of: | |
^ $ \ . * + ? ( ) [ ] { } | | |
*/ | |
; | |
atomEscape: | |
decimalEscape | |
| characterEscape | |
| characterClassEscape | |
; | |
characterEscape: | |
controlEscape | |
| LOWER_C controlLetter | |
| hexEscapeSequence | |
| unicodeEscapeSequence | |
| identityEscape | |
; | |
controlEscape: | |
CONTROL_ESCAPE_CHAR /* one of: fnrtv */ | |
; | |
controlLetter: | |
/* one of: a-z A-Z */ | |
LOWER_ALPHA_CHAR | |
| UPPER_ALPHA_CHAR | |
; | |
identityEscape: | |
sourceCharacter /* but not identifierPart */ | |
; | |
characterClassEscape: | |
'd' | 'D' | 's' | 'S' | 'w' | 'W' | |
; | |
decimalEscape: | |
decimalIntegerLiteral /* lookahead not a member of decimalDigit */ | |
; | |
characterClass: | |
/* [ [lookahead not a member of {^}] ClassRanges ] | |
| [ ^ classRanges ] | |
*/ | |
; | |
classRanges: | |
/* empty */ | |
| nonemptyClassRanges | |
; | |
nonemptyClassRanges: | |
classAtom | |
| classAtom nonemptyClassRangesNoDash | |
| classAtom MINUS classAtom classRanges | |
; | |
nonemptyClassRangesNoDash: | |
classAtom | |
| classAtomNoDash nonemptyClassRangesNoDash | |
| classAtomNoDash MINUS classAtom classRanges | |
; | |
classAtom: | |
MINUS | |
| classAtomNoDash | |
; | |
classAtomNoDash: | |
sourceCharacter /* but not one of: \ ] - */ | |
| BSLASH classEscape | |
; | |
classEscape: | |
decimalEscape | |
| LOWER_B | |
| characterEscape | |
| characterClassEscape | |
; | |
/* a few basic characters and other things that I am going to see */ | |
SEMI: ';'; | |
MPER: '&'; | |
//DASH: '-'; | |
EQ: '='; | |
COMMA: ','; | |
SLASH: '/'; | |
BSLASH: '\\'; | |
LBRACK: '['; | |
RBRACK: ']'; | |
LBRACE: '{'; | |
RBRACE: '}'; | |
LPAREN: '('; | |
RPAREN: ')'; | |
APOSTROPHE: '\''; | |
QUOTE: '"'; | |
QUESTION: '?'; | |
COLON: ':'; | |
ASTERISK: '*'; | |
AT : '@'; | |
AMPER : '&'; | |
PLUS : '+'; | |
MINUS:'-'; | |
DOLLAR : '$'; | |
UNDERSCORE:'_'; | |
DOT : '.'; | |
LT : '<'; | |
GT : '>'; | |
LTEQ: '<='; | |
GTEQ : '>='; | |
EQ2 : '=='; | |
NOTEQ: '!='; | |
EQ3 : '==='; | |
NOTEQ2: '!=='; | |
STAR:'*'; | |
PERCENT : '%'; | |
PLUS2:'++'; | |
MINUS2:'--'; | |
LSHIFT:'<<'; | |
RSHIFT:'>>'; | |
GT3:'>>>'; | |
PIPE : '|'; | |
CAROT : '^'; | |
EXCLAMATION: '!'; | |
TILDE:'~'; | |
AMPER2:'&&'; | |
PIPE2 : '||'; | |
PLUSEQ:'+='; | |
MINUSEQ:'-='; | |
TIMESEQ:'*='; | |
PERCENTEQ:'%='; | |
LSHIFTEQ:'<<='; | |
RSHIFTEQ: '>>='; | |
GT3EQ : '>>>='; | |
AMPEREQ:'&='; | |
PIPEEQ : '|='; | |
CAROTEQ : '^='; | |
DIVIDE : '/'; | |
DIVIDEEQ: '/='; | |
ZERO : '0'; | |
NON_ZERO_DIGIT | |
: ('1'..'9'); | |
DIGIT : ('0'..'9'); | |
EXPONENT_INDICATOR | |
: 'e'|'E'; | |
CONTROL_ESCAPE_CHAR | |
: 'f'|'n'|'r'|'t'|'v'; /* one of: fnrtv */ | |
LOWER_B: 'b'; | |
LOWER_C : 'c'; | |
LOWER_F: 'f'; | |
LOWER_N: 'n'; | |
LOWER_R: 'r'; | |
LOWER_T: 't'; | |
LOWER_V: 'v'; | |
LOWER_X: 'x'; | |
LOWER_U: 'u'; | |
ALPHA_CHARACTER | |
: ('a'..'z')|('A'..'Z'); | |
LOWER_ALPHA_CHAR | |
: ('a'..'z'); | |
UPPER_ALPHA_CHAR | |
: ('a'..'z'); | |
WS: // ignore white space as it doesn't matter | |
( ' ' | '\r' '\n' {newline();}| '\n' {newline();} | '\t' ) { $setType(Token.SKIP); } | |
; | |
SOURCE_CHAR: ('\u0000'..'\uFFFE'); | |
TAB: '\u0009'; | |
VT: '\u000b'; | |
FF: '\u000c'; | |
SP: '\u0020'; | |
NBSP: '\u00a0'; | |
USP: '\u1680' // OGHAM SPACE MARK | |
| '\u2000' // EN QUAD | |
| '\u2001' // EM QUAD | |
| '\u2002' // EN SPACE | |
| '\u2003' // EM SPACE | |
| '\u2004' // THREE-PER-EM SPACE | |
| '\u2005' // FOUR-PER-EM SPACE | |
| '\u2006' // SIX-PER-EM SPACE | |
| '\u2007' // FIGURE SPACE | |
| '\u2008' // PUNCTUATION SPACE | |
| '\u2009' // THIN SPACE | |
| '\u200A' // HAIR SPACE | |
| '\u200B' // ZERO WIDTH SPACE | |
| '\u202F' // NARROW NO-BREAK SPACE | |
| '\u3000' // IDEOGRAPHIC SPACE | |
; | |
LF: '\u000a'; // line feed | |
CR: '\u000d'; // carriage return | |
LS: '\u2028'; // line separator | |
PS: '\u2029'; // paragraph separator | |
UNICODE_LETTER: | |
('\u0041'..'\u005A') | ('\u0061'..'\u007A') | '\u00AA' | '\u00B5' | |
| '\u00BA' | ('\u00C0'..'\u00D6') | ('\u00D8'..'\u00F6') | ('\u00F8'..'\u021F') | |
| ('\u0222'..'\u0233') | ('\u0250'..'\u02AD') | ('\u02B0'..'\u02B8') | ('\u02BB'..'\u02C1') | |
| ('\u02D0'..'\u02D1') | ('\u02E0'..'\u02E4') | '\u02EE' | '\u037A' | |
| '\u0386' | ('\u0388'..'\u038A') | '\u038C' | ('\u038E'..'\u03A1') | |
| ('\u03A3'..'\u03CE') | ('\u03D0'..'\u03D7') | ('\u03DA'..'\u03F3') | ('\u0400'..'\u0481') | |
| ('\u048C'..'\u04C4') | ('\u04C7'..'\u04C8') | ('\u04CB'..'\u04CC') | ('\u04D0'..'\u04F5') | |
| ('\u04F8'..'\u04F9') | ('\u0531'..'\u0556') | '\u0559' |('\u0561'..'\u0587') | |
| ('\u05D0'..'\u05EA') | ('\u05F0'..'\u05F2') | ('\u0621'..'\u063A') |('\u0640'..'\u064A') | |
| ('\u0671'..'\u06D3') | '\u06D5' | ('\u06E5'..'\u06E6') |('\u06FA'..'\u06FC') | |
| '\u0710' | ('\u0712'..'\u072C') | ('\u0780'..'\u07A5') |('\u0905'..'\u0939') | |
| '\u093D' | '\u0950' | ('\u0958'..'\u0961') |('\u0985'..'\u098C') | |
| ('\u098F'..'\u0990') | ('\u0993'..'\u09A8') | ('\u09AA'..'\u09B0') | '\u09B2' | |
| ('\u09B6'..'\u09B9') | ('\u09DC'..'\u09DD') | ('\u09DF'..'\u09E1') |('\u09F0'..'\u09F1') | |
| ('\u0A05'..'\u0A0A') | ('\u0A0F'..'\u0A10') | ('\u0A13'..'\u0A28') |('\u0A2A'..'\u0A30') | |
| ('\u0A32'..'\u0A33') | ('\u0A35'..'\u0A36') | ('\u0A38'..'\u0A39') |('\u0A59'..'\u0A5C') | |
| '\u0A5E' | ('\u0A72'..'\u0A74') | ('\u0A85'..'\u0A8B') | '\u0A8D' | |
| ('\u0A8F'..'\u0A91') | ('\u0A93'..'\u0AA8') | ('\u0AAA'..'\u0AB0') |('\u0AB2'..'\u0AB3') | |
| ('\u0AB5'..'\u0AB9') | '\u0ABD' | '\u0AD0' | '\u0AE0' | |
| ('\u0B05'..'\u0B0C') | ('\u0B0F'..'\u0B10') | ('\u0B13'..'\u0B28') |('\u0B2A'..'\u0B30') | |
| ('\u0B32'..'\u0B33') | ('\u0B36'..'\u0B39') | '\u0B3D' |('\u0B5C'..'\u0B5D') | |
| ('\u0B5F'..'\u0B61') | ('\u0B85'..'\u0B8A') | ('\u0B8E'..'\u0B90') |('\u0B92'..'\u0B95') | |
| ('\u0B99'..'\u0B9A') | '\u0B9C' | ('\u0B9E'..'\u0B9F') |('\u0BA3'..'\u0BA4') | |
| ('\u0BA8'..'\u0BAA') | ('\u0BAE'..'\u0BB5') | ('\u0BB7'..'\u0BB9') |('\u0C05'..'\u0C0C') | |
| ('\u0C0E'..'\u0C10') | ('\u0C12'..'\u0C28') | ('\u0C2A'..'\u0C33') |('\u0C35'..'\u0C39') | |
| ('\u0C60'..'\u0C61') | ('\u0C85'..'\u0C8C') | ('\u0C8E'..'\u0C90') |('\u0C92'..'\u0CA8') | |
| ('\u0CAA'..'\u0CB3') | ('\u0CB5'..'\u0CB9') | '\u0CDE' |('\u0CE0'..'\u0CE1') | |
| ('\u0D05'..'\u0D0C') | ('\u0D0E'..'\u0D10') | ('\u0D12'..'\u0D28') |('\u0D2A'..'\u0D39') | |
| ('\u0D60'..'\u0D61') | ('\u0D85'..'\u0D96') | ('\u0D9A'..'\u0DB1') |('\u0DB3'..'\u0DBB') | |
| '\u0DBD' | ('\u0DC0'..'\u0DC6') | ('\u0E01'..'\u0E30') |('\u0E32'..'\u0E33') | |
| ('\u0E40'..'\u0E46') | ('\u0E81'..'\u0E82') | '\u0E84' |('\u0E87'..'\u0E88') | |
| '\u0E8A' | '\u0E8D' | ('\u0E94'..'\u0E97') |('\u0E99'..'\u0E9F') | |
| ('\u0EA1'..'\u0EA3') | '\u0EA5' | '\u0EA7' |('\u0EAA'..'\u0EAB') | |
| ('\u0EAD'..'\u0EB0') | ('\u0EB2'..'\u0EB3') | ('\u0EBD'..'\u0EC4') | '\u0EC6' | |
| ('\u0EDC'..'\u0EDD') | '\u0F00' | ('\u0F40'..'\u0F6A') |('\u0F88'..'\u0F8B') | |
| ('\u1000'..'\u1021') | ('\u1023'..'\u1027') | ('\u1029'..'\u102A') |('\u1050'..'\u1055') | |
| ('\u10A0'..'\u10C5') | ('\u10D0'..'\u10F6') | ('\u1100'..'\u1159') |('\u115F'..'\u11A2') | |
| ('\u11A8'..'\u11F9') | ('\u1200'..'\u1206') | ('\u1208'..'\u1246') | '\u1248' | |
| ('\u124A'..'\u124D') | ('\u1250'..'\u1256') | '\u1258' |('\u125A'..'\u125D') | |
| ('\u1260'..'\u1286') | '\u1288' | ('\u128A'..'\u128D') |('\u1290'..'\u12AE') | |
| '\u12B0' | ('\u12B2'..'\u12B5') | ('\u12B8'..'\u12BE') | '\u12C0' | |
| ('\u12C2'..'\u12C5') | ('\u12C8'..'\u12CE') | ('\u12D0'..'\u12D6') |('\u12D8'..'\u12EE') | |
| ('\u12F0'..'\u130E') | '\u1310' | ('\u1312'..'\u1315') |('\u1318'..'\u131E') | |
| ('\u1320'..'\u1346') | ('\u1348'..'\u135A') | ('\u13A0'..'\u13B0') |('\u13B1'..'\u13F4') | |
| ('\u1401'..'\u1676') | ('\u1681'..'\u169A') | ('\u16A0'..'\u16EA') |('\u1780'..'\u17B3') | |
| ('\u1820'..'\u1877') | ('\u1880'..'\u18A8') | ('\u1E00'..'\u1E9B') |('\u1EA0'..'\u1EE0') | |
| ('\u1EE1'..'\u1EF9') | ('\u1F00'..'\u1F15') | ('\u1F18'..'\u1F1D') |('\u1F20'..'\u1F39') | |
| ('\u1F3A'..'\u1F45') | ('\u1F48'..'\u1F4D') | ('\u1F50'..'\u1F57') | '\u1F59' | |
| '\u1F5B' | '\u1F5D' | ('\u1F5F'..'\u1F7D') | ('\u1F80'..'\u1FB4') | |
| ('\u1FB6'..'\u1FBC') | '\u1FBE' | ('\u1FC2'..'\u1FC4') | ('\u1FC6'..'\u1FCC') | |
| ('\u1FD0'..'\u1FD3') | ('\u1FD6'..'\u1FDB') | ('\u1FE0'..'\u1FEC') | ('\u1FF2'..'\u1FF4') | |
| ('\u1FF6'..'\u1FFC') | '\u207F' | '\u2102' | '\u2107' | |
| ('\u210A'..'\u2113') | '\u2115' | ('\u2119'..'\u211D') | '\u2124' | |
| '\u2126' | '\u2128' | ('\u212A'..'\u212D') | ('\u212F'..'\u2131') | |
| ('\u2133'..'\u2139') | ('\u2160'..'\u2183') | ('\u3005'..'\u3007') | ('\u3021'..'\u3029') | |
| ('\u3031'..'\u3035') | ('\u3038'..'\u303A') | ('\u3041'..'\u3094') | ('\u309D'..'\u309E') | |
| ('\u30A1'..'\u30FA') | ('\u30FC'..'\u30FE') | ('\u3105'..'\u312C') | ('\u3131'..'\u318E') | |
| ('\u31A0'..'\u31B7') | '\u3400' | '\u4DB5' | '\u4E00' | |
| '\u9FA5' | ('\uA000'..'\uA48C') | '\uAC00' | '\uD7A3' | |
| ('\uF900'..'\uFA2D') | ('\uFB00'..'\uFB06') | ('\uFB13'..'\uFB17') | '\uFB1D' | |
| ('\uFB1F'..'\uFB28') | ('\uFB2A'..'\uFB36') | ('\uFB38'..'\uFB3C') | '\uFB3E' | |
| ('\uFB40'..'\uFB41') | ('\uFB43'..'\uFB44') | ('\uFB46'..'\uFBB1') | ('\uFBD3'..'\uFD3D') | |
| ('\uFD50'..'\uFD8F') | ('\uFD92'..'\uFDC7') | ('\uFDF0'..'\uFDFB') | ('\uFE70'..'\uFE72') | |
| '\uFE74' | ('\uFE76'..'\uFEFC') | ('\uFF21'..'\uFF3A') | ('\uFF41'..'\uFF5A') | |
| ('\uFF66'..'\uFFBE') | ('\uFFC2'..'\uFFC7') | ('\uFFCA'..'\uFFCF') | ('\uFFD2'..'\uFFD7') | |
| ('\uFFDA'..'\uFFDC') | |
; | |
HEXDIGIT: ('0'..'9')|('a'..'f')|('A'..'F'); | |
UNICODE_DIGIT: | |
('\u0030'..'\u0039') | ('\u0660'..'\u0669') | ('\u06F0'..'\u06F9') | ('\u0966'..'\u096F') | |
| ('\u09E6'..'\u09EF') | ('\u0A66'..'\u0A6F') | ('\u0AE6'..'\u0AEF') | ('\u0B66'..'\u0B6F') | |
| ('\u0BE7'..'\u0BEF') | ('\u0C66'..'\u0C6F') | ('\u0CE6'..'\u0CEF') | ('\u0D66'..'\u0D6F') | |
| ('\u0E50'..'\u0E59') | ('\u0ED0'..'\u0ED9') | ('\u0F20'..'\u0F29') | ('\u1040'..'\u1049') | |
| ('\u1369'..'\u1371') | ('\u17E0'..'\u17E9') | ('\u1810'..'\u1819') | ('\uFF10'..'\uFF19') | |
; | |
UNICODE_NONSPACING_MARK | |
: ('\u0300'..'\u036F'); // And more... see: http://www.fileformat.info/info/unicode/category/Mn/list.htm | |
UNICODE_COMBINING_MARK: // Appears to be bogus... see: http://www.fileformat.info/info/unicode/category/Mc/list.htm | |
('\u0300'..'\u034E') | ('\u0360'..'\u0362') | ('\u0483'..'\u0486') | ('\u0591'..'\u05A1') | |
| ('\u05A3'..'\u05B9') | ('\u05BB'..'\u05BD') | '\u05BF' | ('\u05C1'..'\u05C2') | |
| '\u05C4' | ('\u064B'..'\u0655') | '\u0670' | ('\u06D6'..'\u06DC') | |
| ('\u06DF'..'\u06E4') | ('\u06E7'..'\u06E8') | ('\u06EA'..'\u06ED') | '\u0711' | |
| ('\u0730'..'\u074A') | ('\u07A6'..'\u07B0') | ('\u0901'..'\u0903') | '\u093C' | |
| ('\u093E'..'\u094D') | ('\u0951'..'\u0954') | ('\u0962'..'\u0963') | ('\u0981'..'\u0983') | |
| ('\u09BC'..'\u09C4') | ('\u09C7'..'\u09C8') | ('\u09CB'..'\u09CD') | '\u09D7' | |
| ('\u09E2'..'\u09E3') | '\u0A02' | '\u0A3C' | ('\u0A3E'..'\u0A42') | |
| ('\u0A47'..'\u0A48') | ('\u0A4B'..'\u0A4D') | ('\u0A70'..'\u0A71') | ('\u0A81'..'\u0A83') | |
| '\u0ABC' | ('\u0ABE'..'\u0AC5') | ('\u0AC7'..'\u0AC9') | ('\u0ACB'..'\u0ACD') | |
| ('\u0B01'..'\u0B03') | '\u0B3C' | ('\u0B3E'..'\u0B43') | ('\u0B47'..'\u0B48') | |
| ('\u0B4B'..'\u0B4D') | ('\u0B56'..'\u0B57') | ('\u0B82'..'\u0B83') | ('\u0BBE'..'\u0BC2') | |
| ('\u0BC6'..'\u0BC8') | ('\u0BCA'..'\u0BCD') | '\u0BD7' | ('\u0C01'..'\u0C03') | |
| ('\u0C3E'..'\u0C44') | ('\u0C46'..'\u0C48') | ('\u0C4A'..'\u0C4D') | ('\u0C55'..'\u0C56') | |
| ('\u0C82'..'\u0C83') | ('\u0CBE'..'\u0CC4') | ('\u0CC6'..'\u0CC8') | ('\u0CCA'..'\u0CCD') | |
| ('\u0CD5'..'\u0CD6') | ('\u0D02'..'\u0D03') | ('\u0D3E'..'\u0D43') | ('\u0D46'..'\u0D48') | |
| ('\u0D4A'..'\u0D4D') | '\u0D57' | ('\u0D82'..'\u0D83') | '\u0DCA' | |
| ('\u0DCF'..'\u0DD4') | '\u0DD6' | ('\u0DD8'..'\u0DDF') | ('\u0DF2'..'\u0DF3') | |
| '\u0E31' | ('\u0E34'..'\u0E3A') | ('\u0E47'..'\u0E4E') | '\u0EB1' | |
| ('\u0EB4'..'\u0EB9') | ('\u0EBB'..'\u0EBC') | ('\u0EC8'..'\u0ECD') | ('\u0F18'..'\u0F19') | |
| '\u0F35' | '\u0F37' | '\u0F39' | ('\u0F3E'..'\u0F3F') | |
| ('\u0F71'..'\u0F84') | ('\u0F86'..'\u0F87') | ('\u0F90'..'\u0F97') | ('\u0F99'..'\u0FBC') | |
| '\u0FC6' | ('\u102C'..'\u1032') | ('\u1036'..'\u1039') | ('\u1056'..'\u1059') | |
| ('\u17B4'..'\u17D3') | '\u18A9' | ('\u20D0'..'\u20DC') | '\u20E1' | |
| ('\u302A'..'\u302F') | ('\u3099'..'\u309A') | '\uFB1E' | ('\uFE20'..'\uFE23') | |
; | |
UNICODE_CONNECTOR_PUNCTUATION: | |
'\u005F' | ('\u203F'..'\u2040') | '\u30FB' | ('\uFE33'..'\uFE34') | ('\uFE4D'..'\uFE4F') | |
| '\uFF3F' | '\uFF65' | |
; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment