Last active
January 15, 2016 11:22
-
-
Save rflechner/60fc8a1074fb21cb5ff5 to your computer and use it in GitHub Desktop.
Small FSharp parsing toolkit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(** | |
Small parsing bases classes inspired by technique used to parse HTML in FSharp.Data. | |
(see https://github.com/fsharp/FSharp.Data/blob/master/src/Html/HtmlParser.fs#L226) | |
I liked this parsing strategy and it inspired me for other parsing algorithms. | |
*) | |
module ParsingBase | |
open System | |
open System.IO | |
type BufferedString = | |
{ mutable Content:string } | |
static member Empty = { Content=String.Empty } | |
member x.IsNullOrEmpty() = x.Content |> String.IsNullOrEmpty | |
member x.Chars i = x.Content.Chars i | |
member x.RemoveFirst i = | |
if i > x.Content.Length | |
then x.Clear() | |
else x.Content <- x.Content.Substring(i) | |
member x.Length with get() = x.Content.Length | |
member x.Clear() = x.Content <- String.Empty | |
member x.ToCharArray() = x.Content.ToCharArray() | |
member x.SetContent s = x.Content <- s | |
type SubBufferedTextReader (reader:TextReader) = | |
let buffer:BufferedString = BufferedString.Empty | |
member x.PeekChar() = | |
if buffer.IsNullOrEmpty() | |
then reader.Peek() |> char | |
else buffer.Chars 0 | |
member x.ReadNChar n = | |
if buffer.IsNullOrEmpty() | |
then | |
let chars = Array.zeroCreate n | |
reader.ReadBlock(chars, 0, n) |> ignore | |
String(chars) | |
elif buffer.Length >= n then | |
let s = buffer.Content.Substring(0, n) | |
buffer.RemoveFirst n | |
s | |
else | |
let l = buffer.Length - n | |
let chars = Array.zeroCreate l | |
reader.ReadBlock(chars, 0, n) |> ignore | |
let s = buffer.Content | |
buffer.Clear() | |
s + String(chars) | |
member x.Peek() = | |
if buffer.IsNullOrEmpty() | |
then reader.Peek() | |
else buffer.Chars 0 |> int | |
member x.Read() = x.ReadNChar 1 | |
member x.Pop() = x.Read() |> ignore | |
member x.Pop(count) = | |
[|0..(count-1)|] |> Array.map (fun _ -> x.ReadChar()) | |
member x.ReadChar() = | |
if buffer.IsNullOrEmpty() | |
then x.Read() |> char | |
else | |
let c = buffer.Chars 0 | |
buffer.RemoveFirst 1 | |
c | |
member x.PeekNChar n = | |
if n <= 1 | |
then [|x.PeekChar()|] | |
elif buffer.IsNullOrEmpty() then | |
let chars = Array.zeroCreate n | |
reader.ReadBlock(chars, 0, n) |> ignore | |
let s = String chars | |
buffer.SetContent s | |
let b = buffer.ToCharArray() | |
b | |
else | |
let l = n - buffer.Length | |
let chars = Array.zeroCreate l | |
reader.ReadBlock(chars, 0, l) |> ignore | |
buffer.SetContent (buffer.Content + String(chars)) | |
buffer.ToCharArray() | |
type CharList = | |
{ mutable Contents : char list } | |
static member Empty = { Contents = [] } | |
override x.ToString() = String(x.Contents |> List.rev |> List.toArray) | |
member x.Acc c = x.Contents <- c :: x.Contents | |
member x.Length = x.Contents.Length | |
member x.Clear() = x.Contents <- [] | |
type ParsingError = | |
{ Message:string } | |
[<AbstractClass>] | |
type StateBase<'t, 'c> (txt:TextReader, defaultContext:'c) = | |
let content : CharList ref = ref CharList.Empty | |
let tokens : 't list ref = ref List.Empty | |
let context : 'c ref = ref defaultContext | |
let reader : SubBufferedTextReader = SubBufferedTextReader(txt) | |
let errors : ParsingError list ref = ref List.Empty | |
let bag : string list ref = ref List.Empty | |
member x.Pop() = reader.Read() |> ignore | |
member x.Peek() = reader.PeekChar() | |
member x.PeekN n = reader.PeekNChar n | |
member x.Pop(count) = | |
[|0..(count-1)|] |> Array.map (fun _ -> reader.ReadChar()) |> ignore | |
member x.Contents = (!content).ToString().Trim() | |
member x.ContentLength = (!content).Length | |
member x.Acc() = (!content).Acc(reader.ReadChar()) | |
member x.ClearContent() = content := CharList.Empty | |
member x.Emit (token:'t) = | |
tokens := token :: !tokens | |
x.ClearContent() | |
member x.SwithContext c = context := c | |
member x.EmitWith (f : unit -> 't) = x.Emit(f()) | |
member x.Content with get () = !content | |
member x.Tokens with get () = !tokens |> List.rev | |
member x.Context with get () = !context | |
member x.Reader with get () = reader | |
member x.AddError e = | |
errors := e :: !errors | |
member x.Errors with get () = !errors | |
member x.ClearBag() = bag := List.Empty | |
member x.PushBag s = bag := s :: !bag | |
member x.Bag with get() = !bag | |
member x.ClearTokens() = | |
errors := List.Empty | |
type TokenizingResult<'t> = | |
{ Tokens: 't list | |
Errors : ParsingError list } | |
[<AbstractClass>] | |
type TokenizerBase<'t,'c,'s when 's :> StateBase<'t,'c>>(stream:Stream) = | |
let reader = new StreamReader(stream) | |
let state : 's option ref = ref None | |
abstract member CreateState : StreamReader -> 's | |
abstract member Accumulate : 's -> unit | |
abstract member StopParsing : 's -> bool | |
member private x.InitState() = | |
match !state with | |
| Some s -> s | |
| None -> | |
let s = x.CreateState reader | |
state := Some s | |
s | |
member x.Parse() = | |
let s = x.InitState() | |
let next = ref (s.Reader.Peek()) | |
try | |
while !next > 0 && not (x.StopParsing s) do | |
x.Accumulate s | |
next := s.Reader.Peek() | |
with | |
| _ -> () | |
{ Tokens = s.Tokens; Errors = s.Errors } | |
type ParsingResult<'t, 'e> = | |
| Success of model:'t | |
| Failure of errors:'e list | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment