Last active
August 29, 2015 14:05
-
-
Save loosechainsaw/5d2bc9ee44c3e8e96cd0 to your computer and use it in GitHub Desktop.
Regex wip part 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace RegularLanguages | |
open System | |
open NUnit.Framework | |
[<AutoOpen>] | |
module Internals = | |
let private (|Char|_|) x = | |
if Char.IsLetter(x) then | |
Some(x) | |
else | |
None | |
let private (|KleeneStar|_|) x = | |
if x = '*' then | |
Some(x) | |
else | |
None | |
let private (|Concatenation|_|) x = | |
if x = '|' then | |
Some(x) | |
else | |
None | |
type private CharacterCategory = | |
| Regular of char | |
| Kleene | |
| Alternation | |
| OpenParen | |
| CloseParen | |
type Regex = | |
| Character of char | |
| Concat of Regex * Regex | |
| Choice of Regex * Regex | |
| Empty | |
| Star of Regex | |
type private MatchResult = {Match : bool; CheckNext: bool; Rest : char list} | |
type private ExpressionResult = { Result: Regex; UnProcessed: char list} | |
let private categorizeCharacter c = | |
match c with | |
| Char(x) -> Regular(x) | |
| KleeneStar(x) -> Kleene | |
| Concatenation(x) -> Alternation | |
| '(' -> OpenParen | |
| ')' -> CloseParen | |
| _ -> failwith "Bad character" | |
let rec private buildRegexImpl (startgrouping:bool) (input:char list) (acc:Regex) : ExpressionResult = | |
match input with | |
| [] -> | |
{ Result = acc; UnProcessed = []} | |
| x :: t -> | |
let category = categorizeCharacter x | |
if startgrouping then | |
match category with | |
| OpenParen -> | |
let other = buildRegexImpl true t Empty | |
if (List.head (other.UnProcessed)) |> categorizeCharacter = CloseParen then | |
buildRegexImpl false (List.tail other.UnProcessed) other.Result | |
else | |
failwith "Missing closing brace" | |
| Regular(c) -> | |
buildRegexImpl false t ( Character(c) ) | |
| _ -> failwith "Unable to start with the character you entered" | |
else | |
match category with | |
| OpenParen -> | |
let other = buildRegexImpl false t Empty | |
if (List.head (other.UnProcessed)) |> categorizeCharacter = CloseParen then | |
buildRegexImpl false (List.tail other.UnProcessed) other.Result | |
else | |
failwith "Missing closing brace" | |
| CloseParen -> | |
{ Result = acc; UnProcessed = input} | |
| Regular(x) -> | |
buildRegexImpl false t <| Concat(acc, Character(x)) | |
| Kleene -> | |
buildRegexImpl false t <| Star(acc) | |
| Alternation-> | |
let result = (buildRegexImpl true t Empty) | |
{ Result = Choice( acc, (result.Result)); UnProcessed = result.UnProcessed} | |
let buildRegex (text:string) = | |
let chars = List.ofArray <| text.ToCharArray() | |
let regex = buildRegexImpl true chars Empty | |
regex.Result | |
let rec private matchesImpl r i : MatchResult = | |
match r with | |
| Character(c) -> | |
match i with | |
| [] -> { Match = false; CheckNext = false; Rest = []} | |
| h :: t -> { Match = (h = c); CheckNext = false; Rest = t} | |
| Star(inner) -> | |
match i with | |
| [] -> { Match = true; CheckNext = false; Rest = []} | |
| _ -> | |
let result = matchesImpl inner i | |
{ result with CheckNext = true} | |
| Concat(l,r) -> | |
match (matchesImpl l i) with | |
| {Match = true;} as t -> (matchesImpl r t.Rest) | |
| _ -> { Match = false; CheckNext = false; Rest = i} | |
| Choice(l,r) -> | |
match (matchesImpl l i) with | |
| {Match = true;} as t -> t | |
| _ as t -> (matchesImpl r i) | |
| _ -> failwith "Bad case" | |
let matches regex (text:string) = | |
let exp = buildRegex regex | |
let result = text.ToCharArray () |> List.ofArray |> (matchesImpl exp) | |
result.Match | |
[<TestFixture>] | |
type RegexTests() = | |
[<Test>] | |
member x.SingleCharMatchTest() = | |
let text = "a" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Character('a'), buildRegex) | |
[<Test>] | |
member x.ParenthisedConcatKleeneMatchTest() = | |
let text = "(ab)*" | |
let buildRegex = buildRegex text | |
let expected = Star(Concat(Character('a'), Character('b'))) | |
Assert.AreEqual( expected, buildRegex) | |
[<Test>] | |
member x.ParenthisedConcatAlternationMatchTest() = | |
let text = "(ab)|c" | |
let buildRegex = buildRegex text | |
let expected = Choice(Concat(Character('a'), Character('b')), Character('c')) | |
Assert.AreEqual( expected, buildRegex) | |
[<Test>] | |
member x.TwoCharacterConcatenationTest() = | |
let text = "ab" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Concat(Character('a'), Character('b')), buildRegex) | |
[<Test>] | |
member x.CharacterWithKleeneTest() = | |
let text = "a*" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Star(Character('a')), buildRegex) | |
[<Test>] | |
member x.CharacterWithKleeneFollowedByCharTest() = | |
let text = "a*b" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Concat(Star(Character('a')),Character('b')), buildRegex) | |
[<Test>] | |
member x.CharacterWithAlternation() = | |
let text = "a|b" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Choice(Character('a'),Character('b')), buildRegex) | |
[<Test>] | |
member x.CharactersWithAlternation() = | |
let text = "a|b|c" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Choice(Character('a'),Choice(Character('b'),Character('c'))), buildRegex) | |
[<Test>] | |
member x.SingleCharMatchesSingleCharRegexTest() = | |
let text = "a" | |
let regex = "a" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleCharUnionMatchesTest() = | |
let text = "aaaaaaaaa" | |
let regex = "a*" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleInvalidCharDoesNotMatcheTest() = | |
let text = "c" | |
let regex = "a" | |
Assert.IsFalse( (matches regex text)) | |
[<Test>] | |
member x.SingleCharacterMatchesUnionInFirstPositionRegexTest() = | |
let text = "a" | |
let regex = "a|b" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleCharacterMatchesUnionInSecondPositionRegexTest() = | |
let text = "b" | |
let regex = "a|b" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleInvalidCharacterInUnionDoesNotMatch() = | |
let text = "c" | |
let regex = "a|b" | |
Assert.IsFalse( (matches regex text)) | |
[<Test>] | |
member x.SingleCharacterInUnionWithConcatShouldMatch () = | |
let regex = "(ab)|c" | |
let text = "c" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleInvalidCharacterInUnionWithConcatShouldNotMatch () = | |
let regex = "(ab)|c" | |
let text = "d" | |
Assert.IsFalse( (matches regex text)) | |
[<Test>] | |
member x.TwoCharactersInFirstPartOfUnionWithConcatShouldMatch () = | |
let regex = "(ab)|c" | |
let text = "ab" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.GivenAUnionOfKleeneOrAlternationAndFirstPartFailsSecondPartShouldMatch () = | |
let regex = "(ab)*|(cd)" | |
let text = "cd" | |
Assert.IsTrue( (matches regex text)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment