Created
September 21, 2017 16:13
-
-
Save MatthewDavidCampbell/da6734ec28bb03a4f48d8efdaf68f253 to your computer and use it in GitHub Desktop.
Rip of JHint's Pratt parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using System.Collections.Generic; | |
| using System.Linq; | |
| using Next.Services.Maps.Tokenize; | |
| using Next.Services.Maps.Tree; | |
| namespace Next.Services.Maps.Parse { | |
| public static class Grammer { | |
| /// <summary> | |
| /// Default Nud | |
| /// </summary> | |
| /// <returns></returns> | |
| public static Func<Pratt, Token, IExpression> Nud = (parser, token) => | |
| throw new InvalidOperationException($"Symbol {nameof(token)} has no Nud"); | |
| /// <summary> | |
| /// Default Led | |
| /// </summary> | |
| /// <returns></returns> | |
| public static Func<Pratt, IExpression, Token, IExpression> Led = (parser, left, token) => | |
| throw new InvalidOperationException($"Symbol {nameof(token)} has no Led"); | |
| /// <summary> | |
| /// Default binary function (2 operands) | |
| /// </summary> | |
| /// <returns></returns> | |
| private static Func<BinaryOperator, Pratt, IExpression, Token, IExpression> BinaryLed = (o, p, left, t) => { | |
| var first = left; | |
| var second = p.Consume(t.LeftBindingPower); | |
| return new BinaryExpression() { Operator = o, Left = first, Right = second }; | |
| }; | |
| /// <summary> | |
| /// Default right-associative binary function (2 operands) | |
| /// </summary> | |
| /// <returns></returns> | |
| private static Func<BinaryOperator, Pratt, IExpression, Token, IExpression> BinaryRightLed = (o, p, left, t) => { | |
| var first = left; | |
| var second = p.Consume(t.LeftBindingPower - 1); | |
| return new BinaryExpression() { Operator = o, Left = first, Right = second }; | |
| }; | |
| private static Func<Pratt, IExpression, Token, IExpression> Assignment = (p, left, t) => { | |
| var first = left; | |
| var second = p.Consume(t.LeftBindingPower - 1); | |
| // TODO: Check that second is an identifier | |
| return new CallExpression() { | |
| Callee = new Identifier() { Name = t.Id }, | |
| Arguments = new IExpression[] { first, second } | |
| }; | |
| }; | |
| /// <summary> | |
| /// Load Binary symbols | |
| /// </summary> | |
| /// <returns></returns> | |
| public static Pratt Binaries(this Pratt p) { | |
| // And, Or | |
| p.Infix(AtomType.AmpersandPair, (pratt, left, token) => BinaryRightLed(BinaryOperator.And, pratt, left, token), 30); | |
| p.Infix(AtomType.PipePair, (pratt, left, token) => BinaryRightLed(BinaryOperator.Or, pratt, left, token), 30); | |
| // Relational operators | |
| p.Infix(AtomType.LessThan, (pratt, left, token) => BinaryLed(BinaryOperator.LessThan, pratt, left, token), 40); | |
| p.Infix(AtomType.GreaterThan, (pratt, left, token) => BinaryLed(BinaryOperator.GreaterThan, pratt, left, token), 40); | |
| p.Infix(AtomType.LessThanThenEqual, (pratt, left, token) => BinaryLed(BinaryOperator.LessThanOrEqual, pratt, left, token), 40); | |
| p.Infix(AtomType.GreaterThanThenEqual, (pratt, left, token) => BinaryLed(BinaryOperator.GreaterThanOrEqual, pratt, left, token), 40); | |
| p.Infix(AtomType.EqualPair, (pratt, left, token) => BinaryLed(BinaryOperator.Equal, pratt, left, token), 40); | |
| p.Infix(AtomType.ExclamationThenEqual, (pratt, left, token) => BinaryLed(BinaryOperator.NotEqual, pratt, left, token), 40); | |
| // Plus, Minux | |
| p.Infix(AtomType.Plus, (pratt, left, token) => BinaryLed(BinaryOperator.Add, pratt, left, token), 50); | |
| p.Infix(AtomType.Minus, (pratt, left, token) => BinaryLed(BinaryOperator.Subtract, pratt, left, token), 50); | |
| // Multiply, Divide | |
| p.Infix(AtomType.Asterisk, (pratt, left, token) => BinaryLed(BinaryOperator.Multiply, pratt, left, token), 60); | |
| p.Infix(AtomType.Slash, (pratt, left, token) => BinaryLed(BinaryOperator.Divide, pratt, left, token), 60); | |
| return p; | |
| } | |
| /// <summary> | |
| /// Identifiers & member expressions | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static Pratt Identifiers(this Pratt p) { | |
| p.Prefix( | |
| AtomType.Identifier, | |
| (pratt, token) => new Identifier() { Name = token.Value } | |
| ); | |
| p.Infix( | |
| AtomType.Dot, | |
| (Pratt, left, token) => { | |
| var first = left; | |
| var second = p.Current.Value; | |
| p.Advance(); | |
| return new MemberExpression() { Object = left, Property = new Identifier() { Name = second } }; | |
| }, | |
| 80 | |
| ); | |
| return p; | |
| } | |
| /// <summary> | |
| /// Commma, colon, etc. | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static Pratt Separators(this Pratt p) { | |
| p.Symbol(AtomType.Comma); | |
| return p; | |
| } | |
| /// <summary> | |
| /// Closing symbols like semi-colon, right parenthesis etc. | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static Pratt Closers(this Pratt p) { | |
| p.Symbol(AtomType.CloseParenthesis); | |
| p.Symbol(AtomType.Done); | |
| return p; | |
| } | |
| /// <summary> | |
| /// String, integer and float | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static Pratt Literals(this Pratt p) { | |
| p.Prefix(AtomType.Integer, (pratt, token) => new LongLiteral() { Value = Convert.ToInt64(token.Value) }); | |
| p.Prefix(AtomType.Float, (pratt, token) => new DoubleLiteral() { Value = Convert.ToDouble(token.Value) }); | |
| p.Prefix(AtomType.String, (pratt, token) => new StringLiteral() { Value = Convert.ToString(token.Value) }); | |
| return p; | |
| } | |
| /// <summary> | |
| /// Load language constants (e.g. boolean) | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static Pratt Constants(this Pratt p) { | |
| p.Constant("true", true); | |
| p.Constant("false", false); | |
| return p; | |
| } | |
| /// <summary> | |
| /// Infix for call expressions and name casts (i.e. "as") | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static Pratt Functions(this Pratt p) { | |
| p.Infix( | |
| AtomType.OpenParenthesis, | |
| (pratt, left, token) => { | |
| List<IExpression> Arguments = new List<IExpression>(); | |
| // TODO: Check left type (Identifier, MemberExpression etc.) | |
| if (AtomTypeHelper.ToEnum(token.Id) != AtomType.CloseParenthesis) { | |
| while (true) { | |
| Arguments.Add(pratt.Consume(0)); | |
| if (AtomTypeHelper.ToEnum(pratt.Current.Id) != AtomType.Comma) { | |
| break; | |
| } | |
| pratt.Advance(AtomType.Comma.ToString()); | |
| } | |
| } | |
| return new CallExpression() { Callee = left, Arguments = Arguments }; | |
| }, | |
| 80 | |
| ); | |
| return p; | |
| } | |
| /// <summary> | |
| /// | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static Pratt Assignments(this Pratt p) { | |
| p.Infix( | |
| "as", | |
| (pratt, left, token) => Assignment(pratt, left, token), | |
| 10 | |
| ); | |
| return p; | |
| } | |
| /// <summary> | |
| /// Bundles together the common infixes and prefixes | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static Pratt UseDefaults(this Pratt p) { | |
| return p | |
| .Binaries() | |
| .Identifiers() | |
| .Constants() | |
| .Literals() | |
| .Separators() | |
| .Closers() | |
| .Functions() | |
| .Assignments(); | |
| } | |
| /// <summary> | |
| /// Assumes the text results in a single expression (i.e. binary expression) | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static IExpression Single(this Pratt p) { | |
| p.KickStart(); | |
| IExpression expr = p.Consume(0); | |
| return expr; | |
| } | |
| /// <summary> | |
| /// Assumes the text is a set of arguments delimited by commas | |
| /// </summary> | |
| /// <param name="p"></param> | |
| /// <returns></returns> | |
| public static IEnumerable<IExpression> JustArguments(this Pratt p) { | |
| List<IExpression> Arguments = new List<IExpression>(); | |
| p.KickStart(); | |
| while (true) { | |
| if (AtomTypeHelper.ToEnum(p.Current.Id) == AtomType.Done) { | |
| break; | |
| } | |
| Arguments.Add(p.Consume(0)); | |
| if (AtomTypeHelper.ToEnum(p.Current.Id) != AtomType.Comma) { | |
| break; | |
| } | |
| p.Advance(AtomType.Comma.ToString()); | |
| } | |
| return Arguments; | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using Next.Services.Maps.Tree; | |
| namespace Next.Services.Maps.Parse { | |
| public interface ISymbol { | |
| String Id { get; } | |
| int LeftBindingPower { get; } | |
| Func<Pratt, Token, IExpression> Nud { get; } | |
| Func<Pratt, IExpression, Token, IExpression> Led { get; } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using System.Collections.Concurrent; | |
| using System.Collections.Generic; | |
| using Next.Services.Maps.Tokenize; | |
| using Next.Services.Maps.Tree; | |
| namespace Next.Services.Maps.Parse { | |
| public class Pratt { | |
| private readonly IEnumerator<Atom> _atoms; | |
| private readonly string _text; | |
| private ConcurrentDictionary<string, Symbol> _syntax = new ConcurrentDictionary<string, Symbol>(); | |
| private Token _current; | |
| public Pratt (IEnumerator<Atom> atoms, string text) { | |
| this._atoms = atoms; | |
| this._text = text; | |
| } | |
| public Token Current => _current; | |
| /// <summary> | |
| /// Assign symbol (token) to atom type only overriding the left binding power which defaults to zero | |
| /// </summary> | |
| /// <param name="key"></param> | |
| /// <param name="LeftBindingPower"></param> | |
| public Symbol Symbol(AtomType key, int leftBindingPower = 0) { | |
| string sKey = key.ToString(); | |
| return Symbol(sKey, leftBindingPower); | |
| } | |
| public Symbol Symbol(string key, int leftBindingPower = 0) { | |
| Symbol s; | |
| bool exists = _syntax.TryGetValue(key, out s); | |
| if (!exists) { | |
| // TODO: when fails | |
| _syntax.TryAdd(key, new Symbol(key, Grammer.Nud, Grammer.Led, 0)); | |
| } | |
| return s; | |
| } | |
| /// <summary> | |
| /// Infixes | |
| /// </summary> | |
| /// <param name="key"></param> | |
| /// <param name="LeftBindingPower"></param> | |
| /// <param name="Led"></param> | |
| public Symbol Infix(AtomType key, Func<Pratt, IExpression, Token, IExpression> led, int leftBindingPower = 0) { | |
| string sKey = key.ToString(); | |
| return Infix(sKey, led, leftBindingPower); | |
| } | |
| /// <summary> | |
| /// Infixes (from keywords) | |
| /// </summary> | |
| /// <param name="sKey"></param> | |
| /// <param name="led"></param> | |
| /// <param name="leftBindingPower"></param> | |
| /// <returns></returns> | |
| public Symbol Infix(string sKey, Func<Pratt, IExpression, Token, IExpression> led, int leftBindingPower = 0) { | |
| Symbol s; | |
| bool exists = _syntax.TryGetValue(sKey, out s); | |
| if (!exists) { | |
| // TODO: when fails | |
| _syntax.TryAdd(sKey, new Symbol(sKey, Grammer.Nud, led, leftBindingPower)); | |
| } | |
| return s; | |
| } | |
| /// <summary> | |
| /// Prefixes | |
| /// </summary> | |
| /// <param name="key"></param> | |
| /// <param name="nud"></param> | |
| /// <returns></returns> | |
| public Symbol Prefix(AtomType key, Func<Pratt, Token, IExpression> nud) { | |
| string sKey = key.ToString(); | |
| Symbol s; | |
| bool exists = _syntax.TryGetValue(sKey, out s); | |
| if (!exists) { | |
| // TODO: when fails | |
| _syntax.TryAdd(sKey, new Symbol(sKey, nud, Grammer.Led)); | |
| } | |
| return s; | |
| } | |
| public Symbol Constant(string key, object value) { | |
| Func<Pratt, Token, IExpression> fn = (parser, token) => { | |
| switch (value.GetType().Name.ToLower()) { | |
| case "byte": | |
| case "int16": | |
| case "int32": | |
| case "int64": | |
| return new LongLiteral() { Value = Convert.ToInt64(value) }; | |
| case "float": | |
| case "double": | |
| return new DoubleLiteral() { Value = Convert.ToDouble(value) }; | |
| case "string": | |
| return new StringLiteral() { Value = Convert.ToString(value) }; | |
| case "bool": | |
| case "boolean": | |
| return new BooleanLiteral() { Value = Convert.ToBoolean(value) }; | |
| default: | |
| // TODO: Normal message | |
| throw new InvalidCastException(""); | |
| } | |
| }; | |
| Symbol s; | |
| bool exists = _syntax.TryGetValue(key, out s); | |
| if (!exists) { | |
| // TODO: when fails | |
| _syntax.TryAdd(key, new Symbol(key, fn, Grammer.Led)); | |
| } | |
| return s; | |
| } | |
| /// <summary> | |
| /// | |
| /// </summary> | |
| /// <param name="atom"></param> | |
| /// <returns></returns> | |
| private Symbol Identify(Atom atom) { | |
| // TODO: Super basic identifier override with no scope | |
| if (atom.Type == AtomType.Identifier) { | |
| var val = atom.GetText(_text); | |
| if (_syntax.ContainsKey(val)) { | |
| return Symbol(val); | |
| } | |
| } | |
| return Symbol(atom.Type); | |
| } | |
| /// <summary> | |
| /// Set the current token from current atom then move | |
| /// ahead the current atom | |
| /// </summary> | |
| /// <param name="previous"></param> | |
| public void Advance(String Previous = null) { | |
| if (Previous != null && Previous != Current.Id) { | |
| throw new InvalidOperationException($"Expected token {Previous} bevore advancing with actual {Current.Id}"); | |
| } | |
| _atoms.MoveNext(); | |
| Symbol s = Identify(_atoms.Current); | |
| _current = new Token( | |
| s.Id, | |
| _atoms.Current.GetText(_text), | |
| s.Nud, | |
| s.Led, | |
| s.LeftBindingPower | |
| ); | |
| // spool ahead without space | |
| bool HasSpaces = _atoms.Current.Type == AtomType.Spaces; | |
| if (HasSpaces) { | |
| Advance(); | |
| } | |
| } | |
| /// <summary> | |
| /// | |
| /// </summary> | |
| /// <returns></returns> | |
| public Pratt KickStart() { | |
| Advance(); | |
| return this; | |
| } | |
| /// <summary> | |
| /// Right binding power controls how aggressively it binds to tokens to the right | |
| /// </summary> | |
| /// <param name="rightBindingPower"></param> | |
| /// <returns></returns> | |
| public IExpression Consume(int RightBindingPower, bool kickstart = false) { | |
| if (kickstart) { | |
| Advance(); | |
| } | |
| IExpression accumlated; | |
| // store in stack | |
| Token item = _current; | |
| Advance(); | |
| // process literals, variables, and prefix operators | |
| accumlated = item.Nud(this, item); | |
| // while in stack process fold over infix and suffix operators in accumlated | |
| while (RightBindingPower < _current.LeftBindingPower) { | |
| item = _current; | |
| Advance(); | |
| accumlated = item.Led(this, accumlated, item); | |
| } | |
| return accumlated; | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using Next.Services.Maps.Tree; | |
| namespace Next.Services.Maps.Parse { | |
| public struct Symbol: ISymbol { | |
| public String Id { get; } | |
| public int LeftBindingPower { get; } | |
| public Func<Pratt, Token, IExpression> Nud { get; } | |
| public Func<Pratt, IExpression, Token, IExpression> Led { get; } | |
| public Symbol(String id, Func<Pratt, Token, IExpression> nud, Func<Pratt, IExpression, Token, IExpression> led, int leftBindingPower = 0) { | |
| this.Id = id; | |
| this.LeftBindingPower = leftBindingPower; | |
| this.Nud = nud; | |
| this.Led = led; | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using Next.Services.Maps.Tokenize; | |
| using Next.Services.Maps.Tree; | |
| namespace Next.Services.Maps.Parse { | |
| public struct Token : ISymbol { | |
| public String Id { get; } | |
| public String Value { get; } | |
| public int LeftBindingPower { get; } | |
| public Func<Pratt, Token, IExpression> Nud { get; } | |
| public Func<Pratt, IExpression, Token, IExpression> Led { get; } | |
| public Token(String id, String value, Func<Pratt, Token, IExpression> nud, Func<Pratt, IExpression, Token, IExpression> led, int leftBindingPower = 0) { | |
| this.Id = id; | |
| this.Value = value; | |
| this.Nud = nud; | |
| this.Led = led; | |
| this.LeftBindingPower = leftBindingPower; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment