Created
November 11, 2021 23:42
-
-
Save abcarroll/8e43b219186b5efaf5c1a09b38e41218 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ==== | |
# Sail/Lx Primary Expression Grammar (PEG) | |
# | |
# This PEG format is intended for use for the Pikaparser reference implementation metagrammar | |
# and closely follows a standard PEG format. | |
# | |
# (C) Copyright 2020-2021. MIT License. | |
# | |
# ==== | |
# Core re-usable rules | |
# | |
SP <- [ \r\n\t] ; | |
REQSP <- [ \r\n\t]+ ; | |
ANY <- [\u0000-\uFFFF] ; | |
ALPHA <- [a-zA-Z] ; | |
ALPHANUM <- [a-zA-Z0-9] ; | |
WORD <- [a-zA-Z] [a-zA-Z0-9]* ; | |
HEXNUM <- [0-9A-F] / [0-9a-f] ; | |
X <- [ \r\n\t]* ; | |
# ==== | |
# Re-usable non-terminals | |
NameRef | |
<- "$" WORD / WORD | |
; | |
BlockBody | |
<- L_CURLY X body:StatementList X R_CURLY | |
; | |
# ==== | |
# START/ROOT CLAUSE | |
Program <- X StatementList; | |
# ==== | |
# PRIMARY STATEMENTS | |
StatementList | |
<- (Statement X)* | |
; | |
Statement | |
<- fnDecl:FnDecl | |
/ assignStmt:VarDeclStmt | |
/ ifStmt:IfStmt | |
/ whileStmt:WhileStmt | |
/ doWhileStmt:DoWhileStmt | |
/ exprStmt:ExprStmt; | |
VarDeclType | |
<- SYM_COLON X (mut:"mut")? X type:TypeExpr | |
; | |
# Parses: "let " <name> (: Type)? = (Expr) | |
VarDeclStmt | |
<- (let:KW_LET REQSP)? nameRef:NameRef X (VarDeclType X)? SYM_ASSIGN X value:PrimaryExpr X SYM_SEMI | |
; | |
ExprStmt | |
<- PrimaryExpr X SYM_SEMI | |
; | |
# ==== Literals ==== | |
# | |
LiteralExpr | |
<- FloatLiteral | |
/ IntegerLiteral | |
/ BoolLiteral | |
/ NullLiteral | |
/ SetLiteral | |
; | |
IntegerLiteral | |
<- IntDecLiteral | |
/ IntHexLiteral | |
/ IntBinLiteral | |
; | |
IntDecLiteral | |
<- [1-9] [0-9]* / "0" | |
; | |
IntHexLiteral | |
<- "0x" HEXNUM | |
; | |
# TODO: See that java supports a power with binary literals such as "0b00123p23" ? | |
IntBinLiteral | |
<- "0b" [01]+ | |
; | |
FloatLiteral | |
<- floatExp_literal:ExponentFloatLiteral | |
/ float_literal:RegularFloatLiteral | |
; | |
# NOTE: Requires number after the decimal place. Unlike most langs, something like "1." is not allowed. | |
RegularFloatLiteral | |
<- "0.0" / ([1-9]+ [0-9]* "." [0-9]+) | |
; | |
ExponentFloatLiteral | |
<- (RegularFloatLiteral / IntDecLiteral) "e" [+-]? [0-9]+ | |
; | |
BoolLiteral | |
<- KW_TRUE | |
/ KW_FALSE | |
; | |
NullLiteral | |
<- KW_NULL | |
; | |
SetLiteral | |
<- L_BRACKET X (PrimaryExpr ( X ',' X PrimaryExpr )* X)? R_BRACKET | |
; | |
# ==== | |
# Type Expressions / Type Algebra | |
# TODO: | |
# - NullifyTy is last in precedence due to needing to apply to all other types | |
# which won't properly parse otherwise. Possibly, narrow these down significantly | |
# especially if having this lowest precedence causes other issues. In other words, | |
# remove the recursion on TypeExpr in favor of more precise types ... like currently, | |
# Map<int int><int, int> would work due to recursion. | |
# - QUESTION: Should mutability be put here, or in the IfExpr? | |
TypeExpr[90] <- group:(L_PAREN X TypeExpr X R_PAREN) X; | |
TypeExpr[80] <- arrayTy:(TypeExpr arrayDim:(L_BRACKET R_BRACKET X)+) X; | |
TypeExpr[70] <- type:WORD; | |
TypeExpr[60] <- notTy:('~' X TypeExpr) X; | |
TypeExpr[50] <- unionTy:(TypeExpr X '|' X TypeExpr) X; | |
TypeExpr[40] <- intersectionTy:(TypeExpr X '&' X TypeExpr) X; | |
TypeExpr[30] <- paramTy:(TypeExpr X '<' X TypeExprList X '>') X; | |
TypeExpr[20] <- fn:("fn" X '(' X param:TypeExprList X ')' X return:TypeExpr) X; | |
TypeExpr[10] <- nullifyTy:(TypeExpr X SYM_QUESTION) X; | |
TypeExprList | |
<- ( TypeExpr ( X ',' X TypeExpr X )* X )? | |
; | |
# ==== | |
# PrimaryExpr encapsulates all expressions such as references to variable identifiers, | |
# literals (int, float, strings, bool, null), property access, function calls, and array | |
# dimension access. | |
PrimaryExpr[6] <- '(' X PrimaryExpr X ')'; | |
PrimaryExpr[5] <- ref:NameRef; | |
PrimaryExpr[4] <- literal:LiteralExpr; | |
PrimaryExpr[3] <- propAccess:PropertyAccessExpr; | |
PrimaryExpr[2] <- fnCallExpr:FnCallExpr; | |
PrimaryExpr[1] <- dimExpr:DimExpr; | |
PrimaryExpr[0] <- expr:Expr; | |
PropertyAccessExpr <- prop:(PrimaryExpr X "->" X PrimaryExpr) X; | |
DimExpr <- array:(PrimaryExpr X '[' X dim:PrimaryExpr X ']'); | |
FnCallExpr <- name:NameRef X '(' X args:ArgumentList X ')' ; | |
ArgumentList <- (Parameter X ( X ',' X Parameter X )* X)?; | |
Parameter <- PrimaryExpr; | |
# ==== | |
# Operator Expressions: | |
# | |
Expr[95] <- op:(KW_NEW / KW_CLONE) X PrimaryExpr; | |
Expr[90,R] <- arith:(PrimaryExpr X op:(SYM_POW) X PrimaryExpr); | |
Expr[85] <- unary:(op:(SYM_SUB / SYM_ADD / SYM_BW_NOT / SYM_BANG) X PrimaryExpr); | |
Expr[80,L] <- arith:(PrimaryExpr X op:(SYM_MUL / SYM_DIV / SYM_REM) X PrimaryExpr); | |
Expr[75,L] <- arith:(PrimaryExpr X op:(SYM_ADD / SYM_SUB) X PrimaryExpr); | |
Expr[70,L] <- shift:(PrimaryExpr X op:(SYM_SH_LEFT / SYM_SH_RIGHT) X PrimaryExpr); | |
Expr[65] <- cmp:(PrimaryExpr X op:(SYM_LT / SYM_LTE / SYM_GT / SYM_GTE) X PrimaryExpr); | |
Expr[60] <- cmp:(PrimaryExpr X op:(SYM_EQ / SYM_NEQ / SYM_IDENTICAL / SYM_NOTIDENT / SYM_SPACESHIP / KW_IS REQSP KW_NOT / KW_IS / KW_NOT REQSP KW_IN / KW_IN) X PrimaryExpr); | |
Expr[55] <- bwAnd:(PrimaryExpr X op:(SYM_BW_AND) X PrimaryExpr); | |
Expr[50] <- bwXor:(PrimaryExpr X op:(SYM_BW_XOR) X PrimaryExpr); | |
Expr[45] <- bwOr:(PrimaryExpr X op:(SYM_BW_OR) X PrimaryExpr); | |
Expr[40] <- logicAnd:(PrimaryExpr X op:(SYM_LOGIC_AND) X PrimaryExpr); | |
Expr[35] <- logicOr:(PrimaryExpr X op:(SYM_LOGIC_OR) X PrimaryExpr); | |
Expr[30] <- nullCoal:(PrimaryExpr X op:(SYM_NULL_COAL) X PrimaryExpr); | |
Expr[25] <- condPrimaryExpr:(PrimaryExpr X SYM_QUESTION X PrimaryExpr X SYM_COLON); | |
Expr[20] <- yieldFrom:(KW_YIELD X KW_FROM X PrimaryExpr); | |
Expr[15] <- delete:(KW_DELETE X PrimaryExpr); | |
Expr[10] <- yield:(KW_YIELD X PrimaryExpr); | |
# ==== | |
# Block Statements (If/Elseif/Else, While, Do...While, For, Try/Catch, ...) | |
BlockConditional | |
<- (REQSP cond:PrimaryExpr / X L_PAREN X cond:PrimaryExpr X R_PAREN) | |
; | |
# TODO: Disallow mixed paren/space syntax? | |
IfStmt | |
<- if:(KW_IF BlockConditional X BlockBody X) | |
elseif:(KW_ELSEIF BlockConditional X BlockBody X)* | |
else:(KW_ELSE X BlockBody)? | |
; | |
WhileStmt | |
<- KW_WHILE BlockConditional X BlockBody | |
; | |
DoWhileStmt | |
<- KW_DO X BlockBody X KW_WHILE BlockConditional X SYM_SEMI | |
; | |
#BasicForArgPart <- VarDeclStmt _ Expr _ SYM_SEMI _ Expr | |
# ; | |
# | |
#BasicForStmt <- KW_FOR _ ( L_PAREN _ BasicForArgPart _ R_PAREN _ ) _ StmtBody | |
# ; | |
# | |
#ReturnStmt <- "return" _ expr:Expr _ SYM_SEMI | |
# ==== | |
# Top Level Function Declarations | |
Visibility <- ( ( "pub" / "public" ) / "protected" / "private" ) | |
; | |
InstanceType <- ( "instance" / "static" ) | |
; | |
FnDeclModifiers <- visibility:Visibility? X instanceType:InstanceType? | |
; | |
FnTypedVar <- parameter:(name:NameRef X ( SYM_COLON type:TypeExpr )?) X | |
; | |
ParameterList <- parameter:(FnTypedVar X (SYM_COMMA X FnTypedVar )*)? | |
; | |
FnDeclReturnTy <- (SYM_COLON X returnType:TypeExpr) | |
; | |
FnDecl <- X FnDeclModifiers X KW_FN X name:WORD X | |
L_PAREN X parameters:ParameterList X R_PAREN X | |
FnDeclReturnTy? X | |
BlockBody | |
; | |
# ==== | |
# Terminal Keywords | |
# | |
KW_IF <- "if" !ALPHANUM; | |
KW_ELSEIF <- "elseif" !ALPHANUM; | |
KW_ELSE <- "else" !ALPHANUM; | |
KW_NOT <- "not" !ALPHANUM; | |
KW_NEW <- "new" !ALPHANUM; | |
KW_CLONE <- "clone" !ALPHANUM; | |
KW_YIELD <- "yield" !ALPHANUM; | |
KW_FROM <- "from" !ALPHANUM; | |
KW_IS <- "is" !ALPHANUM; | |
KW_IN <- "in" !ALPHANUM; | |
KW_DELETE <- "delete" !ALPHANUM; | |
KW_PRINT <- "print" !ALPHANUM; | |
KW_PRINTLN <- "println" !ALPHANUM; | |
KW_ECHO <- "echo" !ALPHANUM; | |
KW_TRUE <- "true" !ALPHANUM; | |
KW_FALSE <- "false" !ALPHANUM; | |
KW_NULL <- "null" !ALPHANUM; | |
KW_FN <- "fn" !ALPHANUM; | |
KW_LET <- "let" !ALPHANUM; | |
KW_WHILE <- "while" !ALPHANUM; | |
KW_DO <- "do" !ALPHANUM; | |
KW_FOR <- "for" !ALPHANUM; | |
KW_FOREACH <- "foreach" !ALPHANUM; | |
KW_AS <- "as" !ALPHANUM; | |
KW_RETURN <- "return" !ALPHANUM; | |
# ==== | |
# Terminal Symbols | |
# | |
L_PAREN <- "(" ![(] ; | |
R_PAREN <- ")" ![)] ; | |
L_CURLY <- "{" ![{] ; | |
R_CURLY <- "}" ![}] ; | |
L_BRACKET <- "[" ; | |
R_BRACKET <- "]" ; | |
SYM_ASSIGN <- "=" ![=] ; | |
SYM_SEMI <- ";" ; | |
SYM_BANG <- "!" ![!] ; | |
SYM_POW <- "**" ![=] ; | |
SYM_DIV <- "/" ![/=] ; | |
SYM_REM <- "%" ![%] ; | |
SYM_MUL <- "*" ![*=] ; | |
SYM_SUB <- "-" ![-=>] ; | |
SYM_ADD <- "+" ![=+] ; | |
# LESS, LESSEQ, GREATER, GREATEREQ | |
SYM_LT <- "<" ![<=] ; | |
SYM_LTE <- "<=" ![>] ; | |
SYM_GT <- ">" ![>=] ; | |
SYM_GTE <- ">=" ; | |
SYM_SPACESHIP <- "<=>" ![>] ; | |
SYM_EQ <- "==" ![=] ; | |
SYM_NEQ <- "!=" ![=] ; | |
SYM_IDENTICAL <- "===" ![=] ; | |
SYM_NOTIDENT <- "!==" ![=] ; | |
# TODO: CHANGE NAME? | |
# SYM_SHIFTLEFT / SYM_SHIFTRIGHT | |
SYM_SH_LEFT <- "<<" ![<] ; | |
SYM_SH_RIGHT <- ">>" ![>] ; | |
SYM_SH_RIGHT_UN <- ">>>" ; | |
SYM_BW_NOT <- "~" ![=] ; | |
SYM_BW_AND <- "&" ![&] ; | |
SYM_BW_OR <- "|" ![|] ; | |
SYM_BW_XOR <- "^" ; | |
SYM_LOGIC_AND <- "&&" ; | |
SYM_LOGIC_OR <- "||" ; | |
SYM_NULL_COAL <- "??" ; | |
SYM_QUESTION <- "?" ![?] ; | |
SYM_COLON <- ":" ![:] ; | |
# UNUSED | |
SYM_INC <- "++" ; | |
SYM_DEC <- "--" ; | |
SYM_PIPE <- "|" ![|] ; | |
SYM_FSLASH <- "/" ; | |
SYM_BSLASH <- "\\" ; | |
SYM_QUOTE <- "'" ; | |
SYM_DBLQUOTE <- "\"" ; | |
SYM_PERIOD <- "." [!.] ; | |
SYM_ELLIPSES <- "..." ; | |
SYM_COMMA <- "," ; | |
SYM_AT <- "@" ; | |
SYM_HASH <- "#" ; | |
SYM_DOLLAR <- "$" ; | |
SYM_PERCENT <- "%" ; | |
SYM_CARET <- "^" ; | |
SYM_AMP <- "&" ![&] ; | |
SYM_UNDERSC <- "_" ; | |
SYM_MINUS <- "-" ![-] ; | |
SYM_ADD_EQ <- "+=" ; | |
SYM_SUB_EQ <- "-=" ; | |
SYM_MUL_EQ <- "*=" ; | |
SYM_DIV_EQ <- "/=" ; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
discriminant=b*b-4*a*c; | |
x = 1 + 5 * 3; | |
y=2**3**4; | |
z=-5; | |
yield x + d; | |
delete z; | |
3 in x; | |
3 !== 3; | |
$z = 5; | |
let $a = 6; | |
if($a == 1) { | |
$a = 2; | |
} | |
$j = 0; | |
if($b == 1) { | |
$b = 2; | |
} elseif($a == 3) { | |
$b = 4; | |
} | |
$k = 0; | |
if($c == 1) { | |
$c = 2; | |
} else { | |
$c = 4; | |
} | |
$l = 0; | |
if($d == 1) { | |
$d = 2; | |
} elseif($d == 3) { | |
$d = 4; | |
} else { | |
$d = 5; | |
} | |
$m = 0; | |
if $e == 1 { | |
$e = 2; | |
} | |
$n = 0; | |
if $f == 1 { | |
$f = 2; | |
} elseif $f == 3 { | |
$f = 4; | |
} | |
$o = 0; | |
if $g == 1 { | |
$g = 2; | |
} else { | |
$g = 4; | |
} | |
$p = 0; | |
if $h == 1 { | |
$h = 2; | |
} elseif $h == 3 { | |
$h = 4; | |
} else { | |
$h = 5; | |
} | |
$q = 0; | |
if($i == 1) { | |
$i = 2; | |
} elseif $i == 3 { | |
$i = 4; | |
} | |
$r = 0; | |
if $j == 1 { | |
$j = 2; | |
} elseif($j == 3) { | |
$j = 4; | |
} | |
$s = 0; | |
if($k == 1) { | |
$k = 2; | |
} elseif $k == 3 { | |
$k = 4; | |
} else { | |
$k = 5; | |
} | |
$t = 0; | |
if $m == 1 { | |
$m = 2; | |
} elseif($m == 3) { | |
$m = 4; | |
} else { | |
$m = 5; | |
} | |
let $x: int = 5; | |
let $y: int | null = 5; | |
let $z: array<string> = 0; | |
let $z: fn(int,int) void = 0; | |
let $z: fn(argType1,argType2) returnType = 0; | |
let $z: int|float = 0; | |
let $z: ~any = 0; | |
$q = $abc[3] + $xyz * foo($a, $b[0]); | |
$w = 1 + 2 - 3 / 4 * 5; | |
$e = 10 << 1; | |
$r = 9 % 3; | |
$t = $abc[$q[1 + rand()]]; | |
$y = 1 + 2 * 3; | |
$u = 1 * 2 - 3; | |
$i = $a[1 + 2 * 3]; | |
$o = $a[1 * 2 * 3 + $a * 5]; | |
$p = $a[$b[$c]]; | |
$a = $a[(5 + 5) * 6 * $a]; | |
$s = $a[5]; | |
$d = $a[ 1 + $b[6] ]; | |
$f = hello(1, 2); | |
$g = $a [ rand(1, 2) ]; | |
$h = $a[rand($a[0], $b[1])]; | |
$j = $a[rand(1, 2) + 1]; | |
$k = 1 + 2 * 3; | |
$l = 1 * 2 - 3; | |
$z = foo(0); | |
$x = $a[1 + 2][2][3 + $q[$w[0][0]] + ha()][foo()]; | |
$c = $a[foo() + bar()]; | |
let $v = 0; | |
let $b = 10; | |
let $n: int = 2**3**4; | |
let $x: int? = null; | |
$x: (int | float)? = null; | |
$x: i64 = 128; | |
let $y: map<string, string> = null; | |
let $z: map<string, int> = []; | |
let $x: f64 = 123.45; | |
let $y: f64 = 123e4; | |
let $y: f64 = 123e+4; | |
let $y: f64 = 123e-4; | |
let $y: f64 = 123.0e4; | |
let $y: f64 = 123.0e+4; | |
let $y: f64 = 123.0e-4; | |
let $y: f64 = 123.1e4; | |
let $y: f64 = 123.2e+4; | |
let $y: f64 = 123.3e-4; | |
let $n: int? = null; | |
let $y: bool = true; | |
let $z: bool = false; | |
let $z: map<string, int> = [1, 2, foo(), $a[6]]; | |
let $z: map<string, int>? = []; | |
let $z: map<string, string><string, string> = []; | |
fn foo() { | |
let $x = 0; | |
foo(); | |
} | |
fn foo(): void | |
{ | |
bar(); | |
} | |
fn add($x: int, $y: int): num | |
{ | |
return $x + $y; | |
} | |
while($x > 5) { | |
$y = foo($x); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment