Skip to content

Instantly share code, notes, and snippets.

@MatthewDavidCampbell
Created September 21, 2017 16:13
Show Gist options
  • Select an option

  • Save MatthewDavidCampbell/da6734ec28bb03a4f48d8efdaf68f253 to your computer and use it in GitHub Desktop.

Select an option

Save MatthewDavidCampbell/da6734ec28bb03a4f48d8efdaf68f253 to your computer and use it in GitHub Desktop.
Rip of JHint's Pratt parser
using System;
using System.Collections.Generic;
using System.Linq;
using Next.Services.Maps.Tokenize;
using Next.Services.Maps.Tree;
namespace Next.Services.Maps.Parse {
public static class Grammer {
/// <summary>
/// Default Nud
/// </summary>
/// <returns></returns>
public static Func<Pratt, Token, IExpression> Nud = (parser, token) =>
throw new InvalidOperationException($"Symbol {nameof(token)} has no Nud");
/// <summary>
/// Default Led
/// </summary>
/// <returns></returns>
public static Func<Pratt, IExpression, Token, IExpression> Led = (parser, left, token) =>
throw new InvalidOperationException($"Symbol {nameof(token)} has no Led");
/// <summary>
/// Default binary function (2 operands)
/// </summary>
/// <returns></returns>
private static Func<BinaryOperator, Pratt, IExpression, Token, IExpression> BinaryLed = (o, p, left, t) => {
var first = left;
var second = p.Consume(t.LeftBindingPower);
return new BinaryExpression() { Operator = o, Left = first, Right = second };
};
/// <summary>
/// Default right-associative binary function (2 operands)
/// </summary>
/// <returns></returns>
private static Func<BinaryOperator, Pratt, IExpression, Token, IExpression> BinaryRightLed = (o, p, left, t) => {
var first = left;
var second = p.Consume(t.LeftBindingPower - 1);
return new BinaryExpression() { Operator = o, Left = first, Right = second };
};
private static Func<Pratt, IExpression, Token, IExpression> Assignment = (p, left, t) => {
var first = left;
var second = p.Consume(t.LeftBindingPower - 1);
// TODO: Check that second is an identifier
return new CallExpression() {
Callee = new Identifier() { Name = t.Id },
Arguments = new IExpression[] { first, second }
};
};
/// <summary>
/// Load Binary symbols
/// </summary>
/// <returns></returns>
public static Pratt Binaries(this Pratt p) {
// And, Or
p.Infix(AtomType.AmpersandPair, (pratt, left, token) => BinaryRightLed(BinaryOperator.And, pratt, left, token), 30);
p.Infix(AtomType.PipePair, (pratt, left, token) => BinaryRightLed(BinaryOperator.Or, pratt, left, token), 30);
// Relational operators
p.Infix(AtomType.LessThan, (pratt, left, token) => BinaryLed(BinaryOperator.LessThan, pratt, left, token), 40);
p.Infix(AtomType.GreaterThan, (pratt, left, token) => BinaryLed(BinaryOperator.GreaterThan, pratt, left, token), 40);
p.Infix(AtomType.LessThanThenEqual, (pratt, left, token) => BinaryLed(BinaryOperator.LessThanOrEqual, pratt, left, token), 40);
p.Infix(AtomType.GreaterThanThenEqual, (pratt, left, token) => BinaryLed(BinaryOperator.GreaterThanOrEqual, pratt, left, token), 40);
p.Infix(AtomType.EqualPair, (pratt, left, token) => BinaryLed(BinaryOperator.Equal, pratt, left, token), 40);
p.Infix(AtomType.ExclamationThenEqual, (pratt, left, token) => BinaryLed(BinaryOperator.NotEqual, pratt, left, token), 40);
// Plus, Minux
p.Infix(AtomType.Plus, (pratt, left, token) => BinaryLed(BinaryOperator.Add, pratt, left, token), 50);
p.Infix(AtomType.Minus, (pratt, left, token) => BinaryLed(BinaryOperator.Subtract, pratt, left, token), 50);
// Multiply, Divide
p.Infix(AtomType.Asterisk, (pratt, left, token) => BinaryLed(BinaryOperator.Multiply, pratt, left, token), 60);
p.Infix(AtomType.Slash, (pratt, left, token) => BinaryLed(BinaryOperator.Divide, pratt, left, token), 60);
return p;
}
/// <summary>
/// Identifiers & member expressions
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static Pratt Identifiers(this Pratt p) {
p.Prefix(
AtomType.Identifier,
(pratt, token) => new Identifier() { Name = token.Value }
);
p.Infix(
AtomType.Dot,
(Pratt, left, token) => {
var first = left;
var second = p.Current.Value;
p.Advance();
return new MemberExpression() { Object = left, Property = new Identifier() { Name = second } };
},
80
);
return p;
}
/// <summary>
/// Commma, colon, etc.
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static Pratt Separators(this Pratt p) {
p.Symbol(AtomType.Comma);
return p;
}
/// <summary>
/// Closing symbols like semi-colon, right parenthesis etc.
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static Pratt Closers(this Pratt p) {
p.Symbol(AtomType.CloseParenthesis);
p.Symbol(AtomType.Done);
return p;
}
/// <summary>
/// String, integer and float
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static Pratt Literals(this Pratt p) {
p.Prefix(AtomType.Integer, (pratt, token) => new LongLiteral() { Value = Convert.ToInt64(token.Value) });
p.Prefix(AtomType.Float, (pratt, token) => new DoubleLiteral() { Value = Convert.ToDouble(token.Value) });
p.Prefix(AtomType.String, (pratt, token) => new StringLiteral() { Value = Convert.ToString(token.Value) });
return p;
}
/// <summary>
/// Load language constants (e.g. boolean)
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static Pratt Constants(this Pratt p) {
p.Constant("true", true);
p.Constant("false", false);
return p;
}
/// <summary>
/// Infix for call expressions and name casts (i.e. "as")
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static Pratt Functions(this Pratt p) {
p.Infix(
AtomType.OpenParenthesis,
(pratt, left, token) => {
List<IExpression> Arguments = new List<IExpression>();
// TODO: Check left type (Identifier, MemberExpression etc.)
if (AtomTypeHelper.ToEnum(token.Id) != AtomType.CloseParenthesis) {
while (true) {
Arguments.Add(pratt.Consume(0));
if (AtomTypeHelper.ToEnum(pratt.Current.Id) != AtomType.Comma) {
break;
}
pratt.Advance(AtomType.Comma.ToString());
}
}
return new CallExpression() { Callee = left, Arguments = Arguments };
},
80
);
return p;
}
/// <summary>
///
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static Pratt Assignments(this Pratt p) {
p.Infix(
"as",
(pratt, left, token) => Assignment(pratt, left, token),
10
);
return p;
}
/// <summary>
/// Bundles together the common infixes and prefixes
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static Pratt UseDefaults(this Pratt p) {
return p
.Binaries()
.Identifiers()
.Constants()
.Literals()
.Separators()
.Closers()
.Functions()
.Assignments();
}
/// <summary>
/// Assumes the text results in a single expression (i.e. binary expression)
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static IExpression Single(this Pratt p) {
p.KickStart();
IExpression expr = p.Consume(0);
return expr;
}
/// <summary>
/// Assumes the text is a set of arguments delimited by commas
/// </summary>
/// <param name="p"></param>
/// <returns></returns>
public static IEnumerable<IExpression> JustArguments(this Pratt p) {
List<IExpression> Arguments = new List<IExpression>();
p.KickStart();
while (true) {
if (AtomTypeHelper.ToEnum(p.Current.Id) == AtomType.Done) {
break;
}
Arguments.Add(p.Consume(0));
if (AtomTypeHelper.ToEnum(p.Current.Id) != AtomType.Comma) {
break;
}
p.Advance(AtomType.Comma.ToString());
}
return Arguments;
}
}
}
using System;
using Next.Services.Maps.Tree;
namespace Next.Services.Maps.Parse {
public interface ISymbol {
String Id { get; }
int LeftBindingPower { get; }
Func<Pratt, Token, IExpression> Nud { get; }
Func<Pratt, IExpression, Token, IExpression> Led { get; }
}
}
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using Next.Services.Maps.Tokenize;
using Next.Services.Maps.Tree;
namespace Next.Services.Maps.Parse {
public class Pratt {
private readonly IEnumerator<Atom> _atoms;
private readonly string _text;
private ConcurrentDictionary<string, Symbol> _syntax = new ConcurrentDictionary<string, Symbol>();
private Token _current;
public Pratt (IEnumerator<Atom> atoms, string text) {
this._atoms = atoms;
this._text = text;
}
public Token Current => _current;
/// <summary>
/// Assign symbol (token) to atom type only overriding the left binding power which defaults to zero
/// </summary>
/// <param name="key"></param>
/// <param name="LeftBindingPower"></param>
public Symbol Symbol(AtomType key, int leftBindingPower = 0) {
string sKey = key.ToString();
return Symbol(sKey, leftBindingPower);
}
public Symbol Symbol(string key, int leftBindingPower = 0) {
Symbol s;
bool exists = _syntax.TryGetValue(key, out s);
if (!exists) {
// TODO: when fails
_syntax.TryAdd(key, new Symbol(key, Grammer.Nud, Grammer.Led, 0));
}
return s;
}
/// <summary>
/// Infixes
/// </summary>
/// <param name="key"></param>
/// <param name="LeftBindingPower"></param>
/// <param name="Led"></param>
public Symbol Infix(AtomType key, Func<Pratt, IExpression, Token, IExpression> led, int leftBindingPower = 0) {
string sKey = key.ToString();
return Infix(sKey, led, leftBindingPower);
}
/// <summary>
/// Infixes (from keywords)
/// </summary>
/// <param name="sKey"></param>
/// <param name="led"></param>
/// <param name="leftBindingPower"></param>
/// <returns></returns>
public Symbol Infix(string sKey, Func<Pratt, IExpression, Token, IExpression> led, int leftBindingPower = 0) {
Symbol s;
bool exists = _syntax.TryGetValue(sKey, out s);
if (!exists) {
// TODO: when fails
_syntax.TryAdd(sKey, new Symbol(sKey, Grammer.Nud, led, leftBindingPower));
}
return s;
}
/// <summary>
/// Prefixes
/// </summary>
/// <param name="key"></param>
/// <param name="nud"></param>
/// <returns></returns>
public Symbol Prefix(AtomType key, Func<Pratt, Token, IExpression> nud) {
string sKey = key.ToString();
Symbol s;
bool exists = _syntax.TryGetValue(sKey, out s);
if (!exists) {
// TODO: when fails
_syntax.TryAdd(sKey, new Symbol(sKey, nud, Grammer.Led));
}
return s;
}
public Symbol Constant(string key, object value) {
Func<Pratt, Token, IExpression> fn = (parser, token) => {
switch (value.GetType().Name.ToLower()) {
case "byte":
case "int16":
case "int32":
case "int64":
return new LongLiteral() { Value = Convert.ToInt64(value) };
case "float":
case "double":
return new DoubleLiteral() { Value = Convert.ToDouble(value) };
case "string":
return new StringLiteral() { Value = Convert.ToString(value) };
case "bool":
case "boolean":
return new BooleanLiteral() { Value = Convert.ToBoolean(value) };
default:
// TODO: Normal message
throw new InvalidCastException("");
}
};
Symbol s;
bool exists = _syntax.TryGetValue(key, out s);
if (!exists) {
// TODO: when fails
_syntax.TryAdd(key, new Symbol(key, fn, Grammer.Led));
}
return s;
}
/// <summary>
///
/// </summary>
/// <param name="atom"></param>
/// <returns></returns>
private Symbol Identify(Atom atom) {
// TODO: Super basic identifier override with no scope
if (atom.Type == AtomType.Identifier) {
var val = atom.GetText(_text);
if (_syntax.ContainsKey(val)) {
return Symbol(val);
}
}
return Symbol(atom.Type);
}
/// <summary>
/// Set the current token from current atom then move
/// ahead the current atom
/// </summary>
/// <param name="previous"></param>
public void Advance(String Previous = null) {
if (Previous != null && Previous != Current.Id) {
throw new InvalidOperationException($"Expected token {Previous} bevore advancing with actual {Current.Id}");
}
_atoms.MoveNext();
Symbol s = Identify(_atoms.Current);
_current = new Token(
s.Id,
_atoms.Current.GetText(_text),
s.Nud,
s.Led,
s.LeftBindingPower
);
// spool ahead without space
bool HasSpaces = _atoms.Current.Type == AtomType.Spaces;
if (HasSpaces) {
Advance();
}
}
/// <summary>
///
/// </summary>
/// <returns></returns>
public Pratt KickStart() {
Advance();
return this;
}
/// <summary>
/// Right binding power controls how aggressively it binds to tokens to the right
/// </summary>
/// <param name="rightBindingPower"></param>
/// <returns></returns>
public IExpression Consume(int RightBindingPower, bool kickstart = false) {
if (kickstart) {
Advance();
}
IExpression accumlated;
// store in stack
Token item = _current;
Advance();
// process literals, variables, and prefix operators
accumlated = item.Nud(this, item);
// while in stack process fold over infix and suffix operators in accumlated
while (RightBindingPower < _current.LeftBindingPower) {
item = _current;
Advance();
accumlated = item.Led(this, accumlated, item);
}
return accumlated;
}
}
}
using System;
using Next.Services.Maps.Tree;
namespace Next.Services.Maps.Parse {
public struct Symbol: ISymbol {
public String Id { get; }
public int LeftBindingPower { get; }
public Func<Pratt, Token, IExpression> Nud { get; }
public Func<Pratt, IExpression, Token, IExpression> Led { get; }
public Symbol(String id, Func<Pratt, Token, IExpression> nud, Func<Pratt, IExpression, Token, IExpression> led, int leftBindingPower = 0) {
this.Id = id;
this.LeftBindingPower = leftBindingPower;
this.Nud = nud;
this.Led = led;
}
}
}
using System;
using Next.Services.Maps.Tokenize;
using Next.Services.Maps.Tree;
namespace Next.Services.Maps.Parse {
public struct Token : ISymbol {
public String Id { get; }
public String Value { get; }
public int LeftBindingPower { get; }
public Func<Pratt, Token, IExpression> Nud { get; }
public Func<Pratt, IExpression, Token, IExpression> Led { get; }
public Token(String id, String value, Func<Pratt, Token, IExpression> nud, Func<Pratt, IExpression, Token, IExpression> led, int leftBindingPower = 0) {
this.Id = id;
this.Value = value;
this.Nud = nud;
this.Led = led;
this.LeftBindingPower = leftBindingPower;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment