Created
August 21, 2014 14:58
-
-
Save loosechainsaw/19d64c007ec9fc0117e2 to your computer and use it in GitHub Desktop.
Regex WIP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace RegularLanguages | |
open System | |
open NUnit.Framework | |
[<AutoOpen>] | |
module Internals = | |
let private (|Char|_|) x = | |
if Char.IsLetter(x) then | |
Some(x) | |
else | |
None | |
let private (|KleeneStar|_|) x = | |
if x = '*' then | |
Some(x) | |
else | |
None | |
let private (|Concatenation|_|) x = | |
if x = '|' then | |
Some(x) | |
else | |
None | |
type private CharacterCategory = | |
| Regular of char | |
| Kleene | |
| Alternation | |
type Regex = | |
| Character of char | |
| Concat of Regex * Regex | |
| Choice of Regex * Regex | |
| Empty | |
| Star of Regex | |
type private MatchResult = {Match : bool; CheckNext: bool; Rest : char list} | |
let private categorizeCharacter c = | |
match c with | |
| Char(x) -> Regular(x) | |
| KleeneStar(x) -> Kleene | |
| Concatenation(x) -> Alternation | |
| _ -> failwith "Bad character" | |
let rec private buildRegexImpl (input : char list) (acc : Regex) : Regex = | |
match input with | |
| [] -> acc | |
| x :: t -> | |
let current = categorizeCharacter x | |
match acc with | |
| Empty -> | |
match current with | |
| Regular(c) -> | |
buildRegexImpl t <| Character(c) | |
| _ -> failwith "Unable to start with the character you entered" | |
| Character(c) -> | |
match current with | |
| Regular(x) -> | |
buildRegexImpl t <| Concat(acc, Character(x)) | |
| Kleene -> | |
buildRegexImpl t <| Star(acc) | |
| Alternation -> | |
Concat( acc, (buildRegexImpl t Empty)) | |
| _ -> failwith "Error" | |
let buildRegex (text:string) = | |
let chars = List.ofArray <| text.ToCharArray() | |
buildRegexImpl chars Empty | |
let rec private matchesImpl r i = | |
match r with | |
| Character(c) -> | |
match i with | |
| [] -> (false, []) | |
| h :: t -> | |
(h = c, t) | |
| Star(inner) -> | |
match i with | |
| [] -> (true, []) | |
| _ -> | |
matchesImpl inner i | |
| Concat(l,r) -> | |
let a = (matchesImpl l i) | |
if fst <| a then | |
(true, snd <| a) | |
else | |
let b = (matchesImpl r i) | |
if fst <| b then | |
(true, snd <| b) | |
else | |
(false, snd <| b) | |
let matches regex (text:string) = | |
let exp = buildRegex regex | |
text.ToCharArray () |> List.ofArray |> matchesImpl exp |> fst | |
[<TestFixture>] | |
type RegexTests() = | |
[<Test>] | |
member x.SingleCharMatchTest() = | |
let text = "a" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Character('a'), buildRegex) | |
[<Test>] | |
member x.TwoCharacterConcatenationTest() = | |
let text = "ab" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Concat(Character('a'), Character('b')), buildRegex) | |
[<Test>] | |
member x.CharacterWithKleeneTest() = | |
let text = "a*" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Star(Character('a')), buildRegex) | |
[<Test>] | |
member x.CharacterWithAlternationt() = | |
let text = "a|b" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Concat(Character('a'),Character('b')), buildRegex) | |
[<Test>] | |
member x.CharactersWithAlternationt() = | |
let text = "a|b|c" | |
let buildRegex = buildRegex text | |
Assert.AreEqual( Concat(Character('a'),Concat(Character('b'),Character('c'))), buildRegex) | |
[<Test>] | |
member x.SingleCharMatchesSingleCharRegexTest() = | |
let text = "a" | |
let regex = "a" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleCharUnionMatchesTest() = | |
let text = "aaaaaaaaa" | |
let regex = "a*" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleInvalidCharDoesNotMatcheTest() = | |
let text = "c" | |
let regex = "a" | |
Assert.IsFalse( (matches regex text)) | |
[<Test>] | |
member x.SingleCharacterMatchesUnionInFirstPositionRegexTest() = | |
let text = "a" | |
let regex = "a|b" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleCharacterMatchesUnionInSecondPositionRegexTest() = | |
let text = "b" | |
let regex = "a|b" | |
Assert.IsTrue( (matches regex text)) | |
[<Test>] | |
member x.SingleInvalidCharacterInUnionDoesNotMatch() = | |
let text = "c" | |
let regex = "a|b" | |
Assert.IsFalse( (matches regex text)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment