Skip to content

Instantly share code, notes, and snippets.

@abcarroll
Created November 11, 2021 23:42
Show Gist options
  • Save abcarroll/8e43b219186b5efaf5c1a09b38e41218 to your computer and use it in GitHub Desktop.
Save abcarroll/8e43b219186b5efaf5c1a09b38e41218 to your computer and use it in GitHub Desktop.
# ====
# Sail/Lx Primary Expression Grammar (PEG)
#
# This PEG format is intended for use for the Pikaparser reference implementation metagrammar
# and closely follows a standard PEG format.
#
# (C) Copyright 2020-2021. MIT License.
#
# ====
# Core re-usable rules
#
SP <- [ \r\n\t] ;
REQSP <- [ \r\n\t]+ ;
ANY <- [\u0000-\uFFFF] ;
ALPHA <- [a-zA-Z] ;
ALPHANUM <- [a-zA-Z0-9] ;
WORD <- [a-zA-Z] [a-zA-Z0-9]* ;
HEXNUM <- [0-9A-F] / [0-9a-f] ;
X <- [ \r\n\t]* ;
# ====
# Re-usable non-terminals
NameRef
<- "$" WORD / WORD
;
BlockBody
<- L_CURLY X body:StatementList X R_CURLY
;
# ====
# START/ROOT CLAUSE
Program <- X StatementList;
# ====
# PRIMARY STATEMENTS
StatementList
<- (Statement X)*
;
Statement
<- fnDecl:FnDecl
/ assignStmt:VarDeclStmt
/ ifStmt:IfStmt
/ whileStmt:WhileStmt
/ doWhileStmt:DoWhileStmt
/ exprStmt:ExprStmt;
VarDeclType
<- SYM_COLON X (mut:"mut")? X type:TypeExpr
;
# Parses: "let " <name> (: Type)? = (Expr)
VarDeclStmt
<- (let:KW_LET REQSP)? nameRef:NameRef X (VarDeclType X)? SYM_ASSIGN X value:PrimaryExpr X SYM_SEMI
;
ExprStmt
<- PrimaryExpr X SYM_SEMI
;
# ==== Literals ====
#
LiteralExpr
<- FloatLiteral
/ IntegerLiteral
/ BoolLiteral
/ NullLiteral
/ SetLiteral
;
IntegerLiteral
<- IntDecLiteral
/ IntHexLiteral
/ IntBinLiteral
;
IntDecLiteral
<- [1-9] [0-9]* / "0"
;
IntHexLiteral
<- "0x" HEXNUM
;
# TODO: See that java supports a power with binary literals such as "0b00123p23" ?
IntBinLiteral
<- "0b" [01]+
;
FloatLiteral
<- floatExp_literal:ExponentFloatLiteral
/ float_literal:RegularFloatLiteral
;
# NOTE: Requires number after the decimal place. Unlike most langs, something like "1." is not allowed.
RegularFloatLiteral
<- "0.0" / ([1-9]+ [0-9]* "." [0-9]+)
;
ExponentFloatLiteral
<- (RegularFloatLiteral / IntDecLiteral) "e" [+-]? [0-9]+
;
BoolLiteral
<- KW_TRUE
/ KW_FALSE
;
NullLiteral
<- KW_NULL
;
SetLiteral
<- L_BRACKET X (PrimaryExpr ( X ',' X PrimaryExpr )* X)? R_BRACKET
;
# ====
# Type Expressions / Type Algebra
# TODO:
# - NullifyTy is last in precedence due to needing to apply to all other types
# which won't properly parse otherwise. Possibly, narrow these down significantly
# especially if having this lowest precedence causes other issues. In other words,
# remove the recursion on TypeExpr in favor of more precise types ... like currently,
# Map<int int><int, int> would work due to recursion.
# - QUESTION: Should mutability be put here, or in the IfExpr?
TypeExpr[90] <- group:(L_PAREN X TypeExpr X R_PAREN) X;
TypeExpr[80] <- arrayTy:(TypeExpr arrayDim:(L_BRACKET R_BRACKET X)+) X;
TypeExpr[70] <- type:WORD;
TypeExpr[60] <- notTy:('~' X TypeExpr) X;
TypeExpr[50] <- unionTy:(TypeExpr X '|' X TypeExpr) X;
TypeExpr[40] <- intersectionTy:(TypeExpr X '&' X TypeExpr) X;
TypeExpr[30] <- paramTy:(TypeExpr X '<' X TypeExprList X '>') X;
TypeExpr[20] <- fn:("fn" X '(' X param:TypeExprList X ')' X return:TypeExpr) X;
TypeExpr[10] <- nullifyTy:(TypeExpr X SYM_QUESTION) X;
TypeExprList
<- ( TypeExpr ( X ',' X TypeExpr X )* X )?
;
# ====
# PrimaryExpr encapsulates all expressions such as references to variable identifiers,
# literals (int, float, strings, bool, null), property access, function calls, and array
# dimension access.
PrimaryExpr[6] <- '(' X PrimaryExpr X ')';
PrimaryExpr[5] <- ref:NameRef;
PrimaryExpr[4] <- literal:LiteralExpr;
PrimaryExpr[3] <- propAccess:PropertyAccessExpr;
PrimaryExpr[2] <- fnCallExpr:FnCallExpr;
PrimaryExpr[1] <- dimExpr:DimExpr;
PrimaryExpr[0] <- expr:Expr;
PropertyAccessExpr <- prop:(PrimaryExpr X "->" X PrimaryExpr) X;
DimExpr <- array:(PrimaryExpr X '[' X dim:PrimaryExpr X ']');
FnCallExpr <- name:NameRef X '(' X args:ArgumentList X ')' ;
ArgumentList <- (Parameter X ( X ',' X Parameter X )* X)?;
Parameter <- PrimaryExpr;
# ====
# Operator Expressions:
#
Expr[95] <- op:(KW_NEW / KW_CLONE) X PrimaryExpr;
Expr[90,R] <- arith:(PrimaryExpr X op:(SYM_POW) X PrimaryExpr);
Expr[85] <- unary:(op:(SYM_SUB / SYM_ADD / SYM_BW_NOT / SYM_BANG) X PrimaryExpr);
Expr[80,L] <- arith:(PrimaryExpr X op:(SYM_MUL / SYM_DIV / SYM_REM) X PrimaryExpr);
Expr[75,L] <- arith:(PrimaryExpr X op:(SYM_ADD / SYM_SUB) X PrimaryExpr);
Expr[70,L] <- shift:(PrimaryExpr X op:(SYM_SH_LEFT / SYM_SH_RIGHT) X PrimaryExpr);
Expr[65] <- cmp:(PrimaryExpr X op:(SYM_LT / SYM_LTE / SYM_GT / SYM_GTE) X PrimaryExpr);
Expr[60] <- cmp:(PrimaryExpr X op:(SYM_EQ / SYM_NEQ / SYM_IDENTICAL / SYM_NOTIDENT / SYM_SPACESHIP / KW_IS REQSP KW_NOT / KW_IS / KW_NOT REQSP KW_IN / KW_IN) X PrimaryExpr);
Expr[55] <- bwAnd:(PrimaryExpr X op:(SYM_BW_AND) X PrimaryExpr);
Expr[50] <- bwXor:(PrimaryExpr X op:(SYM_BW_XOR) X PrimaryExpr);
Expr[45] <- bwOr:(PrimaryExpr X op:(SYM_BW_OR) X PrimaryExpr);
Expr[40] <- logicAnd:(PrimaryExpr X op:(SYM_LOGIC_AND) X PrimaryExpr);
Expr[35] <- logicOr:(PrimaryExpr X op:(SYM_LOGIC_OR) X PrimaryExpr);
Expr[30] <- nullCoal:(PrimaryExpr X op:(SYM_NULL_COAL) X PrimaryExpr);
Expr[25] <- condPrimaryExpr:(PrimaryExpr X SYM_QUESTION X PrimaryExpr X SYM_COLON);
Expr[20] <- yieldFrom:(KW_YIELD X KW_FROM X PrimaryExpr);
Expr[15] <- delete:(KW_DELETE X PrimaryExpr);
Expr[10] <- yield:(KW_YIELD X PrimaryExpr);
# ====
# Block Statements (If/Elseif/Else, While, Do...While, For, Try/Catch, ...)
BlockConditional
<- (REQSP cond:PrimaryExpr / X L_PAREN X cond:PrimaryExpr X R_PAREN)
;
# TODO: Disallow mixed paren/space syntax?
IfStmt
<- if:(KW_IF BlockConditional X BlockBody X)
elseif:(KW_ELSEIF BlockConditional X BlockBody X)*
else:(KW_ELSE X BlockBody)?
;
WhileStmt
<- KW_WHILE BlockConditional X BlockBody
;
DoWhileStmt
<- KW_DO X BlockBody X KW_WHILE BlockConditional X SYM_SEMI
;
#BasicForArgPart <- VarDeclStmt _ Expr _ SYM_SEMI _ Expr
# ;
#
#BasicForStmt <- KW_FOR _ ( L_PAREN _ BasicForArgPart _ R_PAREN _ ) _ StmtBody
# ;
#
#ReturnStmt <- "return" _ expr:Expr _ SYM_SEMI
# ====
# Top Level Function Declarations
Visibility <- ( ( "pub" / "public" ) / "protected" / "private" )
;
InstanceType <- ( "instance" / "static" )
;
FnDeclModifiers <- visibility:Visibility? X instanceType:InstanceType?
;
FnTypedVar <- parameter:(name:NameRef X ( SYM_COLON type:TypeExpr )?) X
;
ParameterList <- parameter:(FnTypedVar X (SYM_COMMA X FnTypedVar )*)?
;
FnDeclReturnTy <- (SYM_COLON X returnType:TypeExpr)
;
FnDecl <- X FnDeclModifiers X KW_FN X name:WORD X
L_PAREN X parameters:ParameterList X R_PAREN X
FnDeclReturnTy? X
BlockBody
;
# ====
# Terminal Keywords
#
KW_IF <- "if" !ALPHANUM;
KW_ELSEIF <- "elseif" !ALPHANUM;
KW_ELSE <- "else" !ALPHANUM;
KW_NOT <- "not" !ALPHANUM;
KW_NEW <- "new" !ALPHANUM;
KW_CLONE <- "clone" !ALPHANUM;
KW_YIELD <- "yield" !ALPHANUM;
KW_FROM <- "from" !ALPHANUM;
KW_IS <- "is" !ALPHANUM;
KW_IN <- "in" !ALPHANUM;
KW_DELETE <- "delete" !ALPHANUM;
KW_PRINT <- "print" !ALPHANUM;
KW_PRINTLN <- "println" !ALPHANUM;
KW_ECHO <- "echo" !ALPHANUM;
KW_TRUE <- "true" !ALPHANUM;
KW_FALSE <- "false" !ALPHANUM;
KW_NULL <- "null" !ALPHANUM;
KW_FN <- "fn" !ALPHANUM;
KW_LET <- "let" !ALPHANUM;
KW_WHILE <- "while" !ALPHANUM;
KW_DO <- "do" !ALPHANUM;
KW_FOR <- "for" !ALPHANUM;
KW_FOREACH <- "foreach" !ALPHANUM;
KW_AS <- "as" !ALPHANUM;
KW_RETURN <- "return" !ALPHANUM;
# ====
# Terminal Symbols
#
L_PAREN <- "(" ![(] ;
R_PAREN <- ")" ![)] ;
L_CURLY <- "{" ![{] ;
R_CURLY <- "}" ![}] ;
L_BRACKET <- "[" ;
R_BRACKET <- "]" ;
SYM_ASSIGN <- "=" ![=] ;
SYM_SEMI <- ";" ;
SYM_BANG <- "!" ![!] ;
SYM_POW <- "**" ![=] ;
SYM_DIV <- "/" ![/=] ;
SYM_REM <- "%" ![%] ;
SYM_MUL <- "*" ![*=] ;
SYM_SUB <- "-" ![-=>] ;
SYM_ADD <- "+" ![=+] ;
# LESS, LESSEQ, GREATER, GREATEREQ
SYM_LT <- "<" ![<=] ;
SYM_LTE <- "<=" ![>] ;
SYM_GT <- ">" ![>=] ;
SYM_GTE <- ">=" ;
SYM_SPACESHIP <- "<=>" ![>] ;
SYM_EQ <- "==" ![=] ;
SYM_NEQ <- "!=" ![=] ;
SYM_IDENTICAL <- "===" ![=] ;
SYM_NOTIDENT <- "!==" ![=] ;
# TODO: CHANGE NAME?
# SYM_SHIFTLEFT / SYM_SHIFTRIGHT
SYM_SH_LEFT <- "<<" ![<] ;
SYM_SH_RIGHT <- ">>" ![>] ;
SYM_SH_RIGHT_UN <- ">>>" ;
SYM_BW_NOT <- "~" ![=] ;
SYM_BW_AND <- "&" ![&] ;
SYM_BW_OR <- "|" ![|] ;
SYM_BW_XOR <- "^" ;
SYM_LOGIC_AND <- "&&" ;
SYM_LOGIC_OR <- "||" ;
SYM_NULL_COAL <- "??" ;
SYM_QUESTION <- "?" ![?] ;
SYM_COLON <- ":" ![:] ;
# UNUSED
SYM_INC <- "++" ;
SYM_DEC <- "--" ;
SYM_PIPE <- "|" ![|] ;
SYM_FSLASH <- "/" ;
SYM_BSLASH <- "\\" ;
SYM_QUOTE <- "'" ;
SYM_DBLQUOTE <- "\"" ;
SYM_PERIOD <- "." [!.] ;
SYM_ELLIPSES <- "..." ;
SYM_COMMA <- "," ;
SYM_AT <- "@" ;
SYM_HASH <- "#" ;
SYM_DOLLAR <- "$" ;
SYM_PERCENT <- "%" ;
SYM_CARET <- "^" ;
SYM_AMP <- "&" ![&] ;
SYM_UNDERSC <- "_" ;
SYM_MINUS <- "-" ![-] ;
SYM_ADD_EQ <- "+=" ;
SYM_SUB_EQ <- "-=" ;
SYM_MUL_EQ <- "*=" ;
SYM_DIV_EQ <- "/=" ;
discriminant=b*b-4*a*c;
x = 1 + 5 * 3;
y=2**3**4;
z=-5;
yield x + d;
delete z;
3 in x;
3 !== 3;
$z = 5;
let $a = 6;
if($a == 1) {
$a = 2;
}
$j = 0;
if($b == 1) {
$b = 2;
} elseif($a == 3) {
$b = 4;
}
$k = 0;
if($c == 1) {
$c = 2;
} else {
$c = 4;
}
$l = 0;
if($d == 1) {
$d = 2;
} elseif($d == 3) {
$d = 4;
} else {
$d = 5;
}
$m = 0;
if $e == 1 {
$e = 2;
}
$n = 0;
if $f == 1 {
$f = 2;
} elseif $f == 3 {
$f = 4;
}
$o = 0;
if $g == 1 {
$g = 2;
} else {
$g = 4;
}
$p = 0;
if $h == 1 {
$h = 2;
} elseif $h == 3 {
$h = 4;
} else {
$h = 5;
}
$q = 0;
if($i == 1) {
$i = 2;
} elseif $i == 3 {
$i = 4;
}
$r = 0;
if $j == 1 {
$j = 2;
} elseif($j == 3) {
$j = 4;
}
$s = 0;
if($k == 1) {
$k = 2;
} elseif $k == 3 {
$k = 4;
} else {
$k = 5;
}
$t = 0;
if $m == 1 {
$m = 2;
} elseif($m == 3) {
$m = 4;
} else {
$m = 5;
}
let $x: int = 5;
let $y: int | null = 5;
let $z: array<string> = 0;
let $z: fn(int,int) void = 0;
let $z: fn(argType1,argType2) returnType = 0;
let $z: int|float = 0;
let $z: ~any = 0;
$q = $abc[3] + $xyz * foo($a, $b[0]);
$w = 1 + 2 - 3 / 4 * 5;
$e = 10 << 1;
$r = 9 % 3;
$t = $abc[$q[1 + rand()]];
$y = 1 + 2 * 3;
$u = 1 * 2 - 3;
$i = $a[1 + 2 * 3];
$o = $a[1 * 2 * 3 + $a * 5];
$p = $a[$b[$c]];
$a = $a[(5 + 5) * 6 * $a];
$s = $a[5];
$d = $a[ 1 + $b[6] ];
$f = hello(1, 2);
$g = $a [ rand(1, 2) ];
$h = $a[rand($a[0], $b[1])];
$j = $a[rand(1, 2) + 1];
$k = 1 + 2 * 3;
$l = 1 * 2 - 3;
$z = foo(0);
$x = $a[1 + 2][2][3 + $q[$w[0][0]] + ha()][foo()];
$c = $a[foo() + bar()];
let $v = 0;
let $b = 10;
let $n: int = 2**3**4;
let $x: int? = null;
$x: (int | float)? = null;
$x: i64 = 128;
let $y: map<string, string> = null;
let $z: map<string, int> = [];
let $x: f64 = 123.45;
let $y: f64 = 123e4;
let $y: f64 = 123e+4;
let $y: f64 = 123e-4;
let $y: f64 = 123.0e4;
let $y: f64 = 123.0e+4;
let $y: f64 = 123.0e-4;
let $y: f64 = 123.1e4;
let $y: f64 = 123.2e+4;
let $y: f64 = 123.3e-4;
let $n: int? = null;
let $y: bool = true;
let $z: bool = false;
let $z: map<string, int> = [1, 2, foo(), $a[6]];
let $z: map<string, int>? = [];
let $z: map<string, string><string, string> = [];
fn foo() {
let $x = 0;
foo();
}
fn foo(): void
{
bar();
}
fn add($x: int, $y: int): num
{
return $x + $y;
}
while($x > 5) {
$y = foo($x);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment