Last active
December 18, 2021 23:39
-
-
Save theraot/e07af2f8d3acae3f33eb47f0698865c4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "nuget:Theraot.Core/3.0.3" | |
// This file is for RoslynPad | |
using System; | |
using System.Collections; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using Theraot.Core; | |
public class ParseException : Exception | |
{ | |
public ParseException(string message, string @string, int position, Pattern pattern) | |
: base(message) | |
{ | |
Pattern = pattern; | |
String = @string; | |
Position = position; | |
} | |
public Pattern Pattern { get; } | |
public int Position { get; } | |
public string String { get; } | |
} | |
public class Symbol : IEnumerable<Symbol> | |
{ | |
public static readonly Symbol[] EmptySymbols = new Symbol[0]; | |
public Symbol(Pattern pattern) | |
{ | |
Pattern = pattern; | |
} | |
public Pattern Pattern { get; } | |
public virtual IEnumerator<Symbol> GetEnumerator() | |
{ | |
yield return this; | |
} | |
IEnumerator IEnumerable.GetEnumerator() | |
{ | |
return GetEnumerator(); | |
} | |
} | |
public class TerminalSymbol : Symbol | |
{ | |
public TerminalSymbol(Pattern pattern, string value) | |
: base(pattern) | |
{ | |
Value = value; | |
} | |
public string Value { get; } | |
public override string ToString() | |
{ | |
return Value; | |
} | |
} | |
public class CompositeSymbol : Symbol | |
{ | |
public CompositeSymbol(Pattern pattern, IEnumerable<Symbol> symbols) | |
: base(pattern) | |
{ | |
Symbols = symbols; | |
} | |
private IEnumerable<Symbol> Symbols { get; } | |
public override string ToString() | |
{ | |
var builder = new StringBuilder(); | |
foreach (var symbol in Symbols) | |
{ | |
builder.Append(symbol); | |
} | |
return builder.ToString(); | |
} | |
public override IEnumerator<Symbol> GetEnumerator() | |
{ | |
return GraphHelper.ExploreBreadthFirstGraph(this, symbol => symbol is CompositeSymbol compositeSymbol ? compositeSymbol.Symbols : EmptySymbols, EqualityComparer<Symbol>.Default).GetEnumerator(); | |
} | |
} | |
public sealed class Pattern | |
{ | |
private Func<StringProcessor, Symbol> _parse; | |
private Pattern(string name) | |
{ | |
Name = name; | |
_parse = _ => new CompositeSymbol(this, Symbol.EmptySymbols); | |
} | |
public string Name { get; } | |
public static Pattern Conjunction(params Pattern[] patterns) | |
{ | |
return Conjunction("(" + string.Join(" + ", from subPattern in patterns select subPattern.Name) + ")", patterns); | |
} | |
public static Pattern Conjunction(string name, params Pattern[] patterns) | |
{ | |
var pattern = new Pattern(name); | |
pattern._parse = processor => new CompositeSymbol(pattern, (from subPattern in patterns select subPattern.Parse(processor)).ToArray()); | |
return pattern; | |
} | |
public static Pattern Custom(string name, Func<StringProcessor, string> callback) | |
{ | |
if (callback == null) | |
{ | |
throw new ArgumentNullException(nameof(callback)); | |
} | |
var pattern = new Pattern(name); | |
pattern._parse = processor => | |
{ | |
var position = processor.Position; | |
var greedy = processor.Greedy; | |
string? result = null; | |
try | |
{ | |
result = callback(processor); | |
} | |
finally | |
{ | |
if (result == null) | |
{ | |
processor.Position = position; | |
processor.Greedy = greedy; | |
throw new ParseException($"Expected {name}", processor.String, processor.Position, pattern); | |
} | |
} | |
return new TerminalSymbol(pattern, result); | |
}; | |
return pattern; | |
} | |
public static Pattern Disjunction(params Pattern[] patterns) | |
{ | |
return Disjunction("(" + string.Join(" | ", from subPattern in patterns select subPattern.Name) + ")", patterns); | |
} | |
public static Pattern Disjunction(string name, params Pattern[] patterns) | |
{ | |
var pattern = new Pattern(name); | |
pattern._parse = processor => | |
{ | |
var position = processor.Position; | |
var greedy = processor.Greedy; | |
foreach (var subPattern in patterns) | |
{ | |
Symbol? result = null; | |
try | |
{ | |
result = subPattern.Parse(processor); | |
} | |
catch (ParseException exception) | |
{ | |
// swallow | |
Theraot.No.Op(exception); | |
} | |
finally | |
{ | |
if (result == null) | |
{ | |
processor.Position = position; | |
processor.Greedy = greedy; | |
} | |
} | |
if (result != null) | |
{ | |
return new CompositeSymbol(pattern, new[] { result }); | |
} | |
} | |
throw new ParseException($"Expected {name}", processor.String, processor.Position, pattern); | |
}; | |
return pattern; | |
} | |
public static Pattern Empty() | |
{ | |
return Empty(string.Empty); | |
} | |
public static Pattern Empty(string name) | |
{ | |
return new Pattern(name); | |
} | |
public static Pattern Literal(string literal) | |
{ | |
return Literal("\"" + literal.Replace("\"", "\\\"") + "\"", literal); | |
} | |
public static Pattern Literal(string name, string literal) | |
{ | |
var pattern = new Pattern(name); | |
pattern._parse = processor => | |
{ | |
if (processor.Read(literal)) | |
{ | |
return new TerminalSymbol(pattern, literal); | |
} | |
throw new ParseException($"Expected {name} ({literal})", processor.String, processor.Position, pattern); | |
}; | |
return pattern; | |
} | |
public static Pattern Literal(char literal) | |
{ | |
return Literal("\"" + (literal == '"' ? "\\\"" : literal.ToString()) + "\"", literal); | |
} | |
public static Pattern Literal(string name, char literal) | |
{ | |
var pattern = new Pattern(name); | |
pattern._parse = processor => | |
{ | |
if (processor.Read(literal)) | |
{ | |
return new TerminalSymbol(pattern, literal.ToString()); | |
} | |
throw new ParseException($"Expected {name}", processor.String, processor.Position, pattern); | |
}; | |
return pattern; | |
} | |
public static Pattern Optional(Pattern subPattern) | |
{ | |
return Optional("[" + subPattern.Name + "]", subPattern); | |
} | |
public static Pattern Optional(string name, Pattern subPattern) | |
{ | |
var pattern = new Pattern(name); | |
pattern._parse = processor => | |
{ | |
var position = processor.Position; | |
var greedy = processor.Greedy; | |
try | |
{ | |
return new CompositeSymbol(pattern, new[] { subPattern.Parse(processor) }); | |
} | |
catch (ParseException) | |
{ | |
processor.Position = position; | |
processor.Greedy = greedy; | |
return new CompositeSymbol(pattern, Symbol.EmptySymbols); | |
} | |
}; | |
return pattern; | |
} | |
public static Pattern OptionalRepetition(Pattern subPattern) | |
{ | |
return OptionalRepetition(subPattern.Name + "*", subPattern); | |
} | |
public static Pattern OptionalRepetition(string name, Pattern subPattern) | |
{ | |
var pattern = new Pattern(name); | |
pattern._parse = processor => | |
{ | |
var symbols = new List<Symbol>(); | |
while (!processor.EndOfString) | |
{ | |
var position = processor.Position; | |
var greedy = processor.Greedy; | |
Symbol? result = null; | |
try | |
{ | |
result = subPattern.Parse(processor); | |
} | |
catch (ParseException exception) | |
{ | |
// swallow | |
Theraot.No.Op(exception); | |
} | |
finally | |
{ | |
if (result == null) | |
{ | |
processor.Position = position; | |
processor.Greedy = greedy; | |
} | |
} | |
if (result == null) | |
{ | |
break; | |
} | |
symbols.Add(result); | |
} | |
return new CompositeSymbol(pattern, symbols); | |
}; | |
return pattern; | |
} | |
public static Pattern Repetition(Pattern subPattern) | |
{ | |
return Repetition(subPattern.Name + "+", subPattern); | |
} | |
public static Pattern Repetition(string name, Pattern subPattern) | |
{ | |
var pattern = new Pattern(name); | |
pattern._parse = processor => | |
{ | |
var symbols = new List<Symbol>(); | |
while (!processor.EndOfString) | |
{ | |
var position = processor.Position; | |
var greedy = processor.Greedy; | |
Symbol? result = null; | |
try | |
{ | |
result = subPattern.Parse(processor); | |
} | |
catch (ParseException exception) | |
{ | |
// swallow | |
Theraot.No.Op(exception); | |
} | |
finally | |
{ | |
if (result == null) | |
{ | |
processor.Position = position; | |
processor.Greedy = greedy; | |
} | |
} | |
if (result == null) | |
{ | |
break; | |
} | |
symbols.Add(result); | |
} | |
if (symbols.Count == 0) | |
{ | |
throw new ParseException($"Expected {pattern.Name}", processor.String, processor.Position, pattern); | |
} | |
return new CompositeSymbol(pattern, symbols); | |
}; | |
return pattern; | |
} | |
public Symbol Parse(string str) | |
{ | |
var processor = new StringProcessor(str); | |
return Parse(processor); | |
} | |
private Symbol Parse(StringProcessor processor) | |
{ | |
return _parse?.Invoke(processor); | |
} | |
} | |
const string input = "\"hello \" + \"world \" + \" + \" + \"hello\""; | |
var quoteSymbol = Pattern.Literal("QuoteSymbol", '"'); | |
var nonQuoteSymbol = Pattern.Custom("NonQuoteSymbol", s => s.ReadUntil('"')); | |
var String = Pattern.Conjunction("String", quoteSymbol, nonQuoteSymbol, quoteSymbol); | |
var whiteSpace = Pattern.Custom("WhiteSpace", s => s.ReadWhile(char.IsWhiteSpace)); | |
var plusSymbol = Pattern.Literal("PlusSymbol", '+'); | |
var document = Pattern.Repetition(Pattern.Conjunction(whiteSpace, String, whiteSpace, plusSymbol)); | |
var results = from symbol in document.Parse(input) where symbol.Pattern == String select symbol.ToString(); | |
foreach (var v in results.ToArray()) | |
{ | |
Console.WriteLine(v); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment