praeclarum · November 26, 2020 22:29 · praeclarum · May 15, 2016
diff --git a/Parsing.fs b/Parsing.fs
 module Parsing

 /// Remember where we are in the code.
 /// This is a struct to keep memory pressure down.
 /// (Significant perf improvements on iOS.)
 type ParseState =
    struct
        val Code : string
        val Index : int
        new (code : string, index : int) =
            { Code = code; Index = index }
    end
    member p.Rem = p.Code.Length - p.Index
    member p.WithIndex i = ParseState (p.Code, i)

 /// Parsing results in either a value and the next parse state
 /// or a failure (NoResult).
 type ParseResult<'a> =
    | Value of 'a * ParseState
    | NoResult

 /// A parser simply takes a parse state and produces a value
 /// and the proceeding parse state.
 type Parser<'a> = ParseState -> ParseResult<'a>

 /// Stupid .NET number parsing functions can't work on substrings.
 /// This is here just to keep the memory of lexing down.
 let inline parseInt (s : string) (startIndex : int) (length : int) : int =
    let mutable r = 0
    let mutable i = 0
    while i < length do
        r <- (r * 10) + (int s.[startIndex + i] - int '0')
        i <- i + 1
    r

 /// Lexing is slow mkay, and we don't want to slow it down
 /// by constantly allocating strings. So we have this little
 /// class to lazily get substrings from the source code.
 type Substring =
    struct
        val Source : string
        val Index : int
        val Length : int
        new (source : string, index : int, length : int) =
            { Source = source; Index = index; Length = length }
    end
    member this.Value = if this.Length = 0 then "" else this.Source.Substring (this.Index, this.Length)
    member this.GetInt32Value () = parseInt this.Source this.Index this.Length
    member this.GetDoubleValue () =
        System.Double.Parse (this.Source.Substring (this.Index, this.Length),
                             System.Globalization.CultureInfo.InvariantCulture)
    member x.Eq (y : string) =
        let n = x.Length
        if n <> y.Length then false
        else
            let mutable ok = true
            let mutable i = 0
            while ok && i < n do
                ok <- x.Source.[x.Index + i] = y.[i]
                i <- i + 1
            ok
    override this.ToString () = this.Value

 let emptySubstring = Substring ("", 0, 0)

 /// Match two parsers in sequence.
 let ( &&& ) (a : Parser<'a>) (b : Parser<'b>) : Parser<'a * 'b> =
    fun p ->
        match a p with
        | Value (av, ap) ->
            match b ap with
            | Value (bv, bp) -> Value ((av, bv), bp)
            | NoResult -> NoResult
        | NoResult -> NoResult

 /// Match one or the other parsers.
 let ( ||| ) (a : Parser<'a>) (b : Parser<'a>) : Parser<'a> =
    fun p ->
        match a p with
        | NoResult ->
            match b p with
            | NoResult -> NoResult
            | x -> x
        | x -> x

 /// Transform the result of a parser. Use this to construct
 /// your AST.
 let ( >>> ) (a : Parser<'a>) (m : 'a -> 'b) : Parser<'b> =
    fun p ->
        match a p with
        | Value (av, ap) -> Value (m av, ap)
        | NoResult -> NoResult

 /// Match two parsers in sequence and return the second value.
 let ( &&> ) (a : Parser<'a>) (b : Parser<'b>) : Parser<'b> =
    fun p ->
        match a p with
        | Value (_, ap) ->
            match b ap with
            | Value (bv, bp) -> Value (bv, bp)
            | NoResult -> NoResult
        | NoResult -> NoResult

 /// Match two parsers in sequence and return the first value.
 let ( &&< ) (a : Parser<'a>) (b : Parser<'b>) : Parser<'a> =
    fun p ->
        match a p with
        | Value (av, ap) ->
            match b ap with
            | Value (_, bp) -> Value (av, bp)
            | NoResult -> NoResult
        | NoResult -> NoResult

 /// Optional parsing - always succeeds
 let opt (a : Parser<'a>) : Parser<'a option> =
    fun p ->
        match a p with
        | Value (av, ap) -> Value (Some av, ap)
        | NoResult -> Value (None, p)

 /// Repeat a given parser 0 or more times
 let zeroOrMore (a : Parser<'a>) : Parser<ResizeArray<'a>> =
    fun p ->
        let mutable cp = p
        let mutable cont = true
        let mutable r = ResizeArray<'a> ()
        while cont do
            match a cp with
            | Value (av, ap) -> r.Add (av); cp <- ap
            | NoResult -> cont <- false
        Value (r, cp)

 /// Repeat a given parser 0 or more times with a given separator
 let zeroOrMoreSep (sep) (a : Parser<'a>) : Parser<ResizeArray<'a>> =
    fun p ->
        let mutable cp = p
        let mutable cont = true
        let mutable r = ResizeArray<'a> ()
        let mutable needsSep = false
        while cont do
            let scp =
                if needsSep then
                    match sep cp with
                    | Value (_, p) -> p
                    | NoResult -> cont <- false; cp
                else cp
            if cont then
                match a scp with
                | Value (av, ap) -> r.Add (av); cp <- ap; needsSep <- true
                | NoResult -> cont <- false
        Value (r, cp)

 /// Skip white space in a ParseState
 let ws (s : ParseState) : ParseState =
    let n = s.Code.Length
    if s.Index >= n then s
    else
        let mutable i = s.Index
        let mutable ch = s.Code.[i]
        while i < n && System.Char.IsWhiteSpace (ch) do
            i <- i + 1
            if i < n then ch <- s.Code.[i]
        if i = s.Index then s
        else s.WithIndex i

 /// A fixed sequence of characters
 let tok (s : string) : Parser<string> =
    let n = s.Length
    fun p ->
        let p = ws p
        if p.Rem < n then NoResult
        else
            let mutable ok = true
            let mutable i = 0
            while ok && i < n do
                ok <- s.[i] = p.Code.[p.Index + i]
                i <- i + 1
            if ok then Value (s, p.WithIndex (p.Index + n))
            else NoResult

 let isIdentStart (ch : char) =
    ch = '_' || System.Char.IsLetter (ch)
    
 let isIdent (ch : char) =
    ch = '_' || System.Char.IsLetterOrDigit (ch)

 /// A C-style identifier
 let ident (p : ParseState) : ParseResult<Substring> =
    let p = ws p
    if p.Rem < 1 then NoResult
    else
        let mutable ok = isIdentStart p.Code.[p.Index]
        if not ok then NoResult
        else
            let n = p.Code.Length
            let start = p.Index
            let mutable i = start
            while ok && i < n do
                ok <- isIdent p.Code.[i]
                if ok then i <- i + 1
            let r = Substring (p.Code, start, i - start)
            Value (r, p.WithIndex i)

 /// Parse a number with optional leading sign.
 /// This is stuck on American style numbers.
 let number (leadingSign : bool) (allowDecimal : bool) (p : ParseState) : ParseResult<Substring> =
    let p = ws p
    if p.Rem < 1 then NoResult
    else
        let start = p.Index
        let lead = p.Code.[p.Index]
        let mutable ok, i =
            match allowDecimal, leadingSign, lead with
            | _, true, '+' -> true, start + 1
            | _, true, '-' -> true, start + 1
            | true, _, '.' -> true, start
            | _, _, c when System.Char.IsDigit c -> true, start
            | _ -> false, start
        if not ok then NoResult
        else
            let n = p.Code.Length
            let mutable hasDig = false
            let mutable hasDecDot = false
            let mutable hasExp = false
            let mutable hasExpSign = false
            while ok && i < n do
                ok <- 
                    match p.Code.[i] with
                    | '.' -> if not allowDecimal || hasDecDot then false
                             else hasDecDot <- true; true
                    | 'e'
                    | 'E' -> if not allowDecimal || not hasDig || hasExp then false
                             else hasExp <- true; true
                    | '-' -> if not allowDecimal || not hasExp || hasExpSign then false
                             else hasExpSign <- true; true
                    | '+' -> if not allowDecimal || not hasExp || hasExpSign then false
                             else hasExpSign <- true; true
                    | c when System.Char.IsDigit c -> hasDig <- true; true
                    | _ -> false
                if ok then
                    i <- i + 1
            if hasDig then
                let r = Substring (p.Code, start, i - start)
                Value (r, p.WithIndex i)
            else NoResult

 /// Parse a string delineated by the same start and stop char
 /// While '\' is obeyed as in C, no post-processing is performed
 /// to transform them.
 let str (term : char) (p : ParseState) : ParseResult<Substring> =
    let p = ws p
    if p.Rem < 2 then NoResult
    else
        let mutable ok = (p.Code.[p.Index] = term)
        if not ok then NoResult
        else
            let n = p.Code.Length
            let start = p.Index
            let mutable i = start + 1
            while ok && i < n do
                if p.Code.[i] = term then ok <- false; i <- i + 1
                else
                    if p.Code.[i] = '\\' && (i + 1) < n then
                        i <- i + 2
                    else
                        i <- i + 1
            let r = Substring (p.Code, start + 1, i - start - 2)
            Value (r, p.WithIndex i)

 /// Parse a regex and return the whole thing. You won't believe how
 /// slow these functions are. Profile their usage, it's amazing.
 let re (pattern : string) : Parser<string> =
    let r = new System.Text.RegularExpressions.Regex (pattern)
    fun p ->
        let m = r.Match (p.Code, p.Index)
        if m.Success && m.Index = p.Index then Value (m.Value, p.WithIndex (p.Index + m.Length))
        else NoResult

 /// Parse a regex and return one of the groups.
 /// This is great when prototyping a language, but omg,
 /// don't use this in production. Regex is amazingly slow
 /// and memory hungry.
 let reg (pattern : string) (groupIndex : int) : Parser<string> =
    let r = new System.Text.RegularExpressions.Regex (pattern)
    fun p ->
        let m = r.Match (p.Code, p.Index)
        if m.Success && m.Index = p.Index then Value (m.Groups.[groupIndex].Value, p.WithIndex (p.Index + m.Length))
        else NoResult
	module Parsing

	/// Remember where we are in the code.
	/// This is a struct to keep memory pressure down.
	/// (Significant perf improvements on iOS.)
	type ParseState =
	struct
	val Code : string
	val Index : int
	new (code : string, index : int) =
	{ Code = code; Index = index }
	end
	member p.Rem = p.Code.Length - p.Index
	member p.WithIndex i = ParseState (p.Code, i)

	/// Parsing results in either a value and the next parse state
	/// or a failure (NoResult).
	type ParseResult<'a> =
	\| Value of 'a * ParseState
	\| NoResult

	/// A parser simply takes a parse state and produces a value
	/// and the proceeding parse state.
	type Parser<'a> = ParseState -> ParseResult<'a>

	/// Stupid .NET number parsing functions can't work on substrings.
	/// This is here just to keep the memory of lexing down.
	let inline parseInt (s : string) (startIndex : int) (length : int) : int =
	let mutable r = 0
	let mutable i = 0
	while i < length do
	r <- (r * 10) + (int s.[startIndex + i] - int '0')
	i <- i + 1
	r

	/// Lexing is slow mkay, and we don't want to slow it down
	/// by constantly allocating strings. So we have this little
	/// class to lazily get substrings from the source code.
	type Substring =
	struct
	val Source : string
	val Index : int
	val Length : int
	new (source : string, index : int, length : int) =
	{ Source = source; Index = index; Length = length }
	end
	member this.Value = if this.Length = 0 then "" else this.Source.Substring (this.Index, this.Length)
	member this.GetInt32Value () = parseInt this.Source this.Index this.Length
	member this.GetDoubleValue () =
	System.Double.Parse (this.Source.Substring (this.Index, this.Length),
	System.Globalization.CultureInfo.InvariantCulture)
	member x.Eq (y : string) =
	let n = x.Length
	if n <> y.Length then false
	else
	let mutable ok = true
	let mutable i = 0
	while ok && i < n do
	ok <- x.Source.[x.Index + i] = y.[i]
	i <- i + 1
	ok
	override this.ToString () = this.Value

	let emptySubstring = Substring ("", 0, 0)

	/// Match two parsers in sequence.
	let ( &&& ) (a : Parser<'a>) (b : Parser<'b>) : Parser<'a * 'b> =
	fun p ->
	match a p with
	\| Value (av, ap) ->
	match b ap with
	\| Value (bv, bp) -> Value ((av, bv), bp)
	\| NoResult -> NoResult
	\| NoResult -> NoResult

	/// Match one or the other parsers.
	let ( \|\|\| ) (a : Parser<'a>) (b : Parser<'a>) : Parser<'a> =
	fun p ->
	match a p with
	\| NoResult ->
	match b p with
	\| NoResult -> NoResult
	\| x -> x
	\| x -> x

	/// Transform the result of a parser. Use this to construct
	/// your AST.
	let ( >>> ) (a : Parser<'a>) (m : 'a -> 'b) : Parser<'b> =
	fun p ->
	match a p with
	\| Value (av, ap) -> Value (m av, ap)
	\| NoResult -> NoResult

	/// Match two parsers in sequence and return the second value.
	let ( &&> ) (a : Parser<'a>) (b : Parser<'b>) : Parser<'b> =
	fun p ->
	match a p with
	\| Value (_, ap) ->
	match b ap with
	\| Value (bv, bp) -> Value (bv, bp)
	\| NoResult -> NoResult
	\| NoResult -> NoResult

	/// Match two parsers in sequence and return the first value.
	let ( &&< ) (a : Parser<'a>) (b : Parser<'b>) : Parser<'a> =
	fun p ->
	match a p with
	\| Value (av, ap) ->
	match b ap with
	\| Value (_, bp) -> Value (av, bp)
	\| NoResult -> NoResult
	\| NoResult -> NoResult

	/// Optional parsing - always succeeds
	let opt (a : Parser<'a>) : Parser<'a option> =
	fun p ->
	match a p with
	\| Value (av, ap) -> Value (Some av, ap)
	\| NoResult -> Value (None, p)

	/// Repeat a given parser 0 or more times
	let zeroOrMore (a : Parser<'a>) : Parser<ResizeArray<'a>> =
	fun p ->
	let mutable cp = p
	let mutable cont = true
	let mutable r = ResizeArray<'a> ()
	while cont do
	match a cp with
	\| Value (av, ap) -> r.Add (av); cp <- ap
	\| NoResult -> cont <- false
	Value (r, cp)

	/// Repeat a given parser 0 or more times with a given separator
	let zeroOrMoreSep (sep) (a : Parser<'a>) : Parser<ResizeArray<'a>> =
	fun p ->
	let mutable cp = p
	let mutable cont = true
	let mutable r = ResizeArray<'a> ()
	let mutable needsSep = false
	while cont do
	let scp =
	if needsSep then
	match sep cp with
	\| Value (_, p) -> p
	\| NoResult -> cont <- false; cp
	else cp
	if cont then
	match a scp with
	\| Value (av, ap) -> r.Add (av); cp <- ap; needsSep <- true
	\| NoResult -> cont <- false
	Value (r, cp)

	/// Skip white space in a ParseState
	let ws (s : ParseState) : ParseState =
	let n = s.Code.Length
	if s.Index >= n then s
	else
	let mutable i = s.Index
	let mutable ch = s.Code.[i]
	while i < n && System.Char.IsWhiteSpace (ch) do
	i <- i + 1
	if i < n then ch <- s.Code.[i]
	if i = s.Index then s
	else s.WithIndex i

	/// A fixed sequence of characters
	let tok (s : string) : Parser<string> =
	let n = s.Length
	fun p ->
	let p = ws p
	if p.Rem < n then NoResult
	else
	let mutable ok = true
	let mutable i = 0
	while ok && i < n do
	ok <- s.[i] = p.Code.[p.Index + i]
	i <- i + 1
	if ok then Value (s, p.WithIndex (p.Index + n))
	else NoResult

	let isIdentStart (ch : char) =
	ch = '_' \|\| System.Char.IsLetter (ch)

	let isIdent (ch : char) =
	ch = '_' \|\| System.Char.IsLetterOrDigit (ch)

	/// A C-style identifier
	let ident (p : ParseState) : ParseResult<Substring> =
	let p = ws p
	if p.Rem < 1 then NoResult
	else
	let mutable ok = isIdentStart p.Code.[p.Index]
	if not ok then NoResult
	else
	let n = p.Code.Length
	let start = p.Index
	let mutable i = start
	while ok && i < n do
	ok <- isIdent p.Code.[i]
	if ok then i <- i + 1
	let r = Substring (p.Code, start, i - start)
	Value (r, p.WithIndex i)

	/// Parse a number with optional leading sign.
	/// This is stuck on American style numbers.
	let number (leadingSign : bool) (allowDecimal : bool) (p : ParseState) : ParseResult<Substring> =
	let p = ws p
	if p.Rem < 1 then NoResult
	else
	let start = p.Index
	let lead = p.Code.[p.Index]
	let mutable ok, i =
	match allowDecimal, leadingSign, lead with
	\| _, true, '+' -> true, start + 1
	\| _, true, '-' -> true, start + 1
	\| true, _, '.' -> true, start
	\| _, _, c when System.Char.IsDigit c -> true, start
	\| _ -> false, start
	if not ok then NoResult
	else
	let n = p.Code.Length
	let mutable hasDig = false
	let mutable hasDecDot = false
	let mutable hasExp = false
	let mutable hasExpSign = false
	while ok && i < n do
	ok <-
	match p.Code.[i] with
	\| '.' -> if not allowDecimal \|\| hasDecDot then false
	else hasDecDot <- true; true
	\| 'e'
	\| 'E' -> if not allowDecimal \|\| not hasDig \|\| hasExp then false
	else hasExp <- true; true
	\| '-' -> if not allowDecimal \|\| not hasExp \|\| hasExpSign then false
	else hasExpSign <- true; true
	\| '+' -> if not allowDecimal \|\| not hasExp \|\| hasExpSign then false
	else hasExpSign <- true; true
	\| c when System.Char.IsDigit c -> hasDig <- true; true
	\| _ -> false
	if ok then
	i <- i + 1
	if hasDig then
	let r = Substring (p.Code, start, i - start)
	Value (r, p.WithIndex i)
	else NoResult

	/// Parse a string delineated by the same start and stop char
	/// While '\' is obeyed as in C, no post-processing is performed
	/// to transform them.
	let str (term : char) (p : ParseState) : ParseResult<Substring> =
	let p = ws p
	if p.Rem < 2 then NoResult
	else
	let mutable ok = (p.Code.[p.Index] = term)
	if not ok then NoResult
	else
	let n = p.Code.Length
	let start = p.Index
	let mutable i = start + 1
	while ok && i < n do
	if p.Code.[i] = term then ok <- false; i <- i + 1
	else
	if p.Code.[i] = '\\' && (i + 1) < n then
	i <- i + 2
	else
	i <- i + 1
	let r = Substring (p.Code, start + 1, i - start - 2)
	Value (r, p.WithIndex i)

	/// Parse a regex and return the whole thing. You won't believe how
	/// slow these functions are. Profile their usage, it's amazing.
	let re (pattern : string) : Parser<string> =
	let r = new System.Text.RegularExpressions.Regex (pattern)
	fun p ->
	let m = r.Match (p.Code, p.Index)
	if m.Success && m.Index = p.Index then Value (m.Value, p.WithIndex (p.Index + m.Length))
	else NoResult

	/// Parse a regex and return one of the groups.
	/// This is great when prototyping a language, but omg,
	/// don't use this in production. Regex is amazingly slow
	/// and memory hungry.
	let reg (pattern : string) (groupIndex : int) : Parser<string> =
	let r = new System.Text.RegularExpressions.Regex (pattern)
	fun p ->
	let m = r.Match (p.Code, p.Index)
	if m.Success && m.Index = p.Index then Value (m.Groups.[groupIndex].Value, p.WithIndex (p.Index + m.Length))
	else NoResult