Skip to content

Instantly share code, notes, and snippets.

@loosechainsaw
Last active August 29, 2015 14:05
Show Gist options
  • Save loosechainsaw/5d2bc9ee44c3e8e96cd0 to your computer and use it in GitHub Desktop.
Save loosechainsaw/5d2bc9ee44c3e8e96cd0 to your computer and use it in GitHub Desktop.
Regex wip part 2
namespace RegularLanguages
open System
open NUnit.Framework
[<AutoOpen>]
module Internals =
let private (|Char|_|) x =
if Char.IsLetter(x) then
Some(x)
else
None
let private (|KleeneStar|_|) x =
if x = '*' then
Some(x)
else
None
let private (|Concatenation|_|) x =
if x = '|' then
Some(x)
else
None
type private CharacterCategory =
| Regular of char
| Kleene
| Alternation
| OpenParen
| CloseParen
type Regex =
| Character of char
| Concat of Regex * Regex
| Choice of Regex * Regex
| Empty
| Star of Regex
type private MatchResult = {Match : bool; CheckNext: bool; Rest : char list}
type private ExpressionResult = { Result: Regex; UnProcessed: char list}
let private categorizeCharacter c =
match c with
| Char(x) -> Regular(x)
| KleeneStar(x) -> Kleene
| Concatenation(x) -> Alternation
| '(' -> OpenParen
| ')' -> CloseParen
| _ -> failwith "Bad character"
let rec private buildRegexImpl (startgrouping:bool) (input:char list) (acc:Regex) : ExpressionResult =
match input with
| [] ->
{ Result = acc; UnProcessed = []}
| x :: t ->
let category = categorizeCharacter x
if startgrouping then
match category with
| OpenParen ->
let other = buildRegexImpl true t Empty
if (List.head (other.UnProcessed)) |> categorizeCharacter = CloseParen then
buildRegexImpl false (List.tail other.UnProcessed) other.Result
else
failwith "Missing closing brace"
| Regular(c) ->
buildRegexImpl false t ( Character(c) )
| _ -> failwith "Unable to start with the character you entered"
else
match category with
| OpenParen ->
let other = buildRegexImpl false t Empty
if (List.head (other.UnProcessed)) |> categorizeCharacter = CloseParen then
buildRegexImpl false (List.tail other.UnProcessed) other.Result
else
failwith "Missing closing brace"
| CloseParen ->
{ Result = acc; UnProcessed = input}
| Regular(x) ->
buildRegexImpl false t <| Concat(acc, Character(x))
| Kleene ->
buildRegexImpl false t <| Star(acc)
| Alternation->
let result = (buildRegexImpl true t Empty)
{ Result = Choice( acc, (result.Result)); UnProcessed = result.UnProcessed}
let buildRegex (text:string) =
let chars = List.ofArray <| text.ToCharArray()
let regex = buildRegexImpl true chars Empty
regex.Result
let rec private matchesImpl r i : MatchResult =
match r with
| Character(c) ->
match i with
| [] -> { Match = false; CheckNext = false; Rest = []}
| h :: t -> { Match = (h = c); CheckNext = false; Rest = t}
| Star(inner) ->
match i with
| [] -> { Match = true; CheckNext = false; Rest = []}
| _ ->
let result = matchesImpl inner i
{ result with CheckNext = true}
| Concat(l,r) ->
match (matchesImpl l i) with
| {Match = true;} as t -> (matchesImpl r t.Rest)
| _ -> { Match = false; CheckNext = false; Rest = i}
| Choice(l,r) ->
match (matchesImpl l i) with
| {Match = true;} as t -> t
| _ as t -> (matchesImpl r i)
| _ -> failwith "Bad case"
let matches regex (text:string) =
let exp = buildRegex regex
let result = text.ToCharArray () |> List.ofArray |> (matchesImpl exp)
result.Match
[<TestFixture>]
type RegexTests() =
[<Test>]
member x.SingleCharMatchTest() =
let text = "a"
let buildRegex = buildRegex text
Assert.AreEqual( Character('a'), buildRegex)
[<Test>]
member x.ParenthisedConcatKleeneMatchTest() =
let text = "(ab)*"
let buildRegex = buildRegex text
let expected = Star(Concat(Character('a'), Character('b')))
Assert.AreEqual( expected, buildRegex)
[<Test>]
member x.ParenthisedConcatAlternationMatchTest() =
let text = "(ab)|c"
let buildRegex = buildRegex text
let expected = Choice(Concat(Character('a'), Character('b')), Character('c'))
Assert.AreEqual( expected, buildRegex)
[<Test>]
member x.TwoCharacterConcatenationTest() =
let text = "ab"
let buildRegex = buildRegex text
Assert.AreEqual( Concat(Character('a'), Character('b')), buildRegex)
[<Test>]
member x.CharacterWithKleeneTest() =
let text = "a*"
let buildRegex = buildRegex text
Assert.AreEqual( Star(Character('a')), buildRegex)
[<Test>]
member x.CharacterWithKleeneFollowedByCharTest() =
let text = "a*b"
let buildRegex = buildRegex text
Assert.AreEqual( Concat(Star(Character('a')),Character('b')), buildRegex)
[<Test>]
member x.CharacterWithAlternation() =
let text = "a|b"
let buildRegex = buildRegex text
Assert.AreEqual( Choice(Character('a'),Character('b')), buildRegex)
[<Test>]
member x.CharactersWithAlternation() =
let text = "a|b|c"
let buildRegex = buildRegex text
Assert.AreEqual( Choice(Character('a'),Choice(Character('b'),Character('c'))), buildRegex)
[<Test>]
member x.SingleCharMatchesSingleCharRegexTest() =
let text = "a"
let regex = "a"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleCharUnionMatchesTest() =
let text = "aaaaaaaaa"
let regex = "a*"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleInvalidCharDoesNotMatcheTest() =
let text = "c"
let regex = "a"
Assert.IsFalse( (matches regex text))
[<Test>]
member x.SingleCharacterMatchesUnionInFirstPositionRegexTest() =
let text = "a"
let regex = "a|b"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleCharacterMatchesUnionInSecondPositionRegexTest() =
let text = "b"
let regex = "a|b"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleInvalidCharacterInUnionDoesNotMatch() =
let text = "c"
let regex = "a|b"
Assert.IsFalse( (matches regex text))
[<Test>]
member x.SingleCharacterInUnionWithConcatShouldMatch () =
let regex = "(ab)|c"
let text = "c"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleInvalidCharacterInUnionWithConcatShouldNotMatch () =
let regex = "(ab)|c"
let text = "d"
Assert.IsFalse( (matches regex text))
[<Test>]
member x.TwoCharactersInFirstPartOfUnionWithConcatShouldMatch () =
let regex = "(ab)|c"
let text = "ab"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.GivenAUnionOfKleeneOrAlternationAndFirstPartFailsSecondPartShouldMatch () =
let regex = "(ab)*|(cd)"
let text = "cd"
Assert.IsTrue( (matches regex text))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment