Last active
October 8, 2023 13:11
-
-
Save conartist6/877fdb0ff87c0170a1bd2f8a51dede4b to your computer and use it in GitHub Desktop.
Annotated agAST builder example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Node objects are immutable | |
// Also immutable are: properties, attributes, children, terminals, and any arrays | |
// Immutable trees can be cached as valid with regard to a particular grammar! | |
const freeze = (node) => Object.freeze(Object.seal(node)); | |
// Helpers to make the following code less verbose | |
let t = { | |
token: (type, str, attributes) => t.node(type, [t.str([str])], {}, attributes), | |
node: (type, children, properties, attributes) => | |
freeze({ | |
...type, | |
children: freeze(children), | |
properties: freeze(properties), | |
attributes: freeze(attributes), | |
}), | |
id: ([str]) => { | |
const { 0: language, 1: production } = str.split(':'); | |
return { language, production }; | |
}, | |
trivia: ([str]) => freeze({ type: 'Trivia', value: str }), | |
str: ([str]) => freeze({ type: 'String', value: str }), | |
ref: ([property]) => freeze({ type: 'Reference', value: property }), | |
gap: ([property]) => freeze({ type: 'Gap', value: property }), | |
}; | |
// This tree is an example of agAST (A General Abstract Syntax Tree) | |
// The structure is meant to be useful in describing code written in any language | |
// The tree represents the input: | |
// eat( /\w/ ) | |
let tree = t.node( | |
// ID specifies the language and production type | |
// The language name will be resolvable to some well-known URL | |
// The URL should point to a validator for the language | |
// CSTML syntax allows omission of language name when it is implied | |
t.id`Instruction:Call`, | |
// The children array creates a total ordering of all nodes and tokens | |
// It ensures that any document can be printed without needing a grammar | |
[ | |
// A reference child looks up a key in properties | |
t.ref`verb`, | |
// It should also be possible to reverse a terminal -> node relationship | |
// e.g. a linter rule might want to do: | |
// - let node = properties.open | |
// - let term = getTerminal(node) | |
// - getTrailingTrivia(term) | |
t.ref`open`, | |
t.trivia` `, // The linter would find this trivia | |
t.ref`argument`, | |
// Trivia has no metadata, and so can be a terminal | |
// Comments are considered a separate language embedded onto trivia! | |
t.trivia` `, | |
t.ref`close`, | |
], | |
// The properties object allows for fast named lookups | |
{ | |
verb: t.token(t.id`Instruction:Identifier`, 'eat'), | |
open: t.token(t.id`Instruction:Punctuator`, '('), | |
argument: t.node( | |
t.id`Spamex:RegexMatcher`, | |
[t.ref`open`, t.ref`[alternatives]`, t.ref`close`, t.ref`flags`], | |
{ | |
open: t.token(t.id`Spamex:Punctuator`, '/'), | |
alternatives: [ | |
t.node(t.id`Regex:Alternative`, [t.ref`[elements]`], { | |
elements: [ | |
t.node( | |
t.id`Regex:CharacterSet`, | |
[t.ref`escape`, t.ref`value`], | |
{ | |
escape: t.token(t.id`Regex:Punctuator`, '\\'), | |
value: t.token(t.id`Regex:Keyword`, 'w'), | |
}, | |
{ kind: 'word' }, | |
), | |
], | |
}), | |
], | |
close: t.token(t.id`Spamex:Punctuator`, '/'), | |
flags: t.node(t.id`Regex:Flags`), | |
}, | |
), | |
close: t.token(t.id`Instruction:Punctuator`, ')'), | |
}, | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment