Created
July 8, 2012 13:10
-
-
Save dtchepak/3070889 to your computer and use it in GitHub Desktop.
Monadic parser combinators in C#, attempting to parse some basic text markup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
namespace MarkupParser | |
{ | |
public abstract class Node | |
{ | |
public static Parser<String> TextParser() | |
{ | |
return Parser.Satisfies(IsNotReserved).Many().Then(cs => Parser.Value(String.Join("", cs))); | |
} | |
private static bool IsReserved(char arg) { return "{}*".Any(arg.Equals); } | |
private static bool IsNotReserved(char arg) { return !IsReserved(arg); } | |
public static Parser<Node> TextNodeParser() | |
{ | |
return TextParser().Then(s => Parser<Node>.Value(new TextNode(s))); | |
} | |
public static Parser<Node> NodeParser() | |
{ | |
return BoldParser().Or(BindingParser()).Or(TextNodeParser()); | |
} | |
public static Parser<Node> BoldParser() | |
{ | |
var innerNodeParser = BindingParser().Or(TextNodeParser()).Many(); | |
return Parser.DelimitedText('*') | |
.Then(innerText => Parser<Node>.Value(new BoldNode(innerNodeParser.Parse(innerText).Value))); | |
} | |
public static Parser<Node> BindingParser() | |
{ | |
return Parser.DelimitedText('{', '}').Then(text => Parser<Node>.Value(new BindingNode(text))); | |
} | |
public static string ToString(IEnumerable<Node> nodes) | |
{ | |
return String.Join("", nodes); | |
} | |
} | |
public class TextNode : Node | |
{ | |
public TextNode(string text) { Text = text; } | |
public TextNode(IEnumerable<char> text) { Text = new String(text.ToArray()); } | |
public string Text { get; private set; } | |
public override string ToString() { return Text; } | |
} | |
public class BoldNode : Node | |
{ | |
public IEnumerable<Node> Nodes { get; private set; } | |
public BoldNode(IEnumerable<Node> nodes) { Nodes = nodes; } | |
public BoldNode(Node node) { Nodes = new[] { node }; } | |
public override string ToString() { return "(BOLD: " + ToString(Nodes) + ")"; } | |
} | |
public class BindingNode : Node | |
{ | |
public string BindingExpression { get; private set; } | |
public BindingNode(string bindingExpression) { BindingExpression = bindingExpression; } | |
public override string ToString() { return "(BINDING: " + BindingExpression + ")"; } | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
namespace MarkupParser | |
{ | |
public class Parser<T> | |
{ | |
private readonly Func<string, ParseResult<T>> _parse; | |
public Parser(Func<string, ParseResult<T>> parse) { _parse = parse; } | |
public ParseResult<T> Parse(string s) { return _parse(s); } | |
public static Parser<T> Fail() { return new Parser<T>(s => null); } | |
public static Parser<T> Value(T value) { return Parser.Value(value); } | |
public Parser<T> Or(Parser<T> alternate) | |
{ | |
return new Parser<T>(s => Parse(s) ?? alternate.Parse(s)); | |
} | |
public Parser<T1> Then<T1>(Func<T, Parser<T1>> getNextParser) | |
{ | |
return new Parser<T1>(s => | |
{ | |
var firstResult = Parse(s); | |
if (firstResult == null) { return null; } | |
var nextParser = getNextParser(firstResult.Value); | |
return nextParser.Parse(firstResult.Remaining); | |
}); | |
} | |
public Parser<IEnumerable<T>> AtLeastOne() | |
{ | |
return this | |
.Then(x => Many() | |
.Then(xs => Parser.Value((new[] { x }).Concat(xs)))); | |
} | |
public Parser<IEnumerable<T>> Many() { | |
//return AtLeastOne().Or(Parser<IEnumerable<T>>.Value(new T[0])); | |
return new Parser<IEnumerable<T>>(ParseMultiple); | |
} | |
private ParseResult<IEnumerable<T>> ParseMultiple(string input) | |
{ | |
var remainingInput = input; | |
var values = new List<T>(); | |
while (remainingInput != "") | |
{ | |
var result = Parse(remainingInput); | |
if (result == null) break; | |
values.Add(result.Value); | |
remainingInput = result.Remaining; | |
} | |
return new ParseResult<IEnumerable<T>>(remainingInput, values); | |
} | |
} | |
public class Parser | |
{ | |
public static Parser<T> Value<T>(T value) | |
{ | |
return new Parser<T>(s => new ParseResult<T>(s, value)); | |
} | |
public static Parser<char> Char() | |
{ | |
return new Parser<char>(s => string.IsNullOrEmpty(s) ? null : new ParseResult<char>(s.Substring(1), s[0])); | |
} | |
public static Parser<char> Satisfies(Func<char, bool> predicate) | |
{ | |
return Char().Then(c => predicate(c) ? Value(c) : Parser<char>.Fail()); | |
} | |
public static Parser<char> Is(char c) { return Satisfies(c.Equals); } | |
public static Parser<char> IsNot(char c) { return Satisfies(parsed => c != parsed); } | |
public static Parser<string> DelimitedText(char delimiter) | |
{ | |
return DelimitedText(delimiter, delimiter); | |
} | |
public static Parser<string> DelimitedText(char start, char end) | |
{ | |
return Is(start) | |
.Then(startDelim => IsNot(end).Many() | |
.Then(text => Is(end) | |
.Then(endDelim => Value(new String(text.ToArray()))))); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Linq; | |
using NUnit.Framework; | |
using Shouldly; | |
namespace MarkupParser | |
{ | |
public class ParserTests | |
{ | |
/* ... snip ... */ | |
[Test] | |
public void BoldAndTextAndBinding() | |
{ | |
const string expected = "hello (BOLD: world and (BINDING: binding))!"; | |
var result = Node.NodeParser().Many().Parse("hello *world and {binding}*!"); | |
Node.ToString(result.Value).ShouldBe(expected); | |
} | |
[Test] | |
public void InvalidInput() | |
{ | |
// Fails; infinite loop | |
//var result = Node.NodeParser().Many().Parse("this is *unterminated bold"); | |
//result.ShouldBe(null); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment