Skip to content

Instantly share code, notes, and snippets.

@loosechainsaw
Created August 21, 2014 14:58
Show Gist options
  • Save loosechainsaw/19d64c007ec9fc0117e2 to your computer and use it in GitHub Desktop.
Save loosechainsaw/19d64c007ec9fc0117e2 to your computer and use it in GitHub Desktop.
Regex WIP
namespace RegularLanguages
open System
open NUnit.Framework
[<AutoOpen>]
module Internals =
let private (|Char|_|) x =
if Char.IsLetter(x) then
Some(x)
else
None
let private (|KleeneStar|_|) x =
if x = '*' then
Some(x)
else
None
let private (|Concatenation|_|) x =
if x = '|' then
Some(x)
else
None
type private CharacterCategory =
| Regular of char
| Kleene
| Alternation
type Regex =
| Character of char
| Concat of Regex * Regex
| Choice of Regex * Regex
| Empty
| Star of Regex
type private MatchResult = {Match : bool; CheckNext: bool; Rest : char list}
let private categorizeCharacter c =
match c with
| Char(x) -> Regular(x)
| KleeneStar(x) -> Kleene
| Concatenation(x) -> Alternation
| _ -> failwith "Bad character"
let rec private buildRegexImpl (input : char list) (acc : Regex) : Regex =
match input with
| [] -> acc
| x :: t ->
let current = categorizeCharacter x
match acc with
| Empty ->
match current with
| Regular(c) ->
buildRegexImpl t <| Character(c)
| _ -> failwith "Unable to start with the character you entered"
| Character(c) ->
match current with
| Regular(x) ->
buildRegexImpl t <| Concat(acc, Character(x))
| Kleene ->
buildRegexImpl t <| Star(acc)
| Alternation ->
Concat( acc, (buildRegexImpl t Empty))
| _ -> failwith "Error"
let buildRegex (text:string) =
let chars = List.ofArray <| text.ToCharArray()
buildRegexImpl chars Empty
let rec private matchesImpl r i =
match r with
| Character(c) ->
match i with
| [] -> (false, [])
| h :: t ->
(h = c, t)
| Star(inner) ->
match i with
| [] -> (true, [])
| _ ->
matchesImpl inner i
| Concat(l,r) ->
let a = (matchesImpl l i)
if fst <| a then
(true, snd <| a)
else
let b = (matchesImpl r i)
if fst <| b then
(true, snd <| b)
else
(false, snd <| b)
let matches regex (text:string) =
let exp = buildRegex regex
text.ToCharArray () |> List.ofArray |> matchesImpl exp |> fst
[<TestFixture>]
type RegexTests() =
[<Test>]
member x.SingleCharMatchTest() =
let text = "a"
let buildRegex = buildRegex text
Assert.AreEqual( Character('a'), buildRegex)
[<Test>]
member x.TwoCharacterConcatenationTest() =
let text = "ab"
let buildRegex = buildRegex text
Assert.AreEqual( Concat(Character('a'), Character('b')), buildRegex)
[<Test>]
member x.CharacterWithKleeneTest() =
let text = "a*"
let buildRegex = buildRegex text
Assert.AreEqual( Star(Character('a')), buildRegex)
[<Test>]
member x.CharacterWithAlternationt() =
let text = "a|b"
let buildRegex = buildRegex text
Assert.AreEqual( Concat(Character('a'),Character('b')), buildRegex)
[<Test>]
member x.CharactersWithAlternationt() =
let text = "a|b|c"
let buildRegex = buildRegex text
Assert.AreEqual( Concat(Character('a'),Concat(Character('b'),Character('c'))), buildRegex)
[<Test>]
member x.SingleCharMatchesSingleCharRegexTest() =
let text = "a"
let regex = "a"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleCharUnionMatchesTest() =
let text = "aaaaaaaaa"
let regex = "a*"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleInvalidCharDoesNotMatcheTest() =
let text = "c"
let regex = "a"
Assert.IsFalse( (matches regex text))
[<Test>]
member x.SingleCharacterMatchesUnionInFirstPositionRegexTest() =
let text = "a"
let regex = "a|b"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleCharacterMatchesUnionInSecondPositionRegexTest() =
let text = "b"
let regex = "a|b"
Assert.IsTrue( (matches regex text))
[<Test>]
member x.SingleInvalidCharacterInUnionDoesNotMatch() =
let text = "c"
let regex = "a|b"
Assert.IsFalse( (matches regex text))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment