Created
May 7, 2015 03:38
-
-
Save jozefg/01a1360512d8f34052d1 to your computer and use it in GitHub Desktop.
Regex matcher
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module regex where | |
open import Data.Char using (Char; _≟_) | |
open import Data.List using (List ; _∷_ ; []) | |
open import Relation.Binary.PropositionalEquality | |
open import Relation.Nullary | |
open import Data.Empty using (⊥) | |
-- Reason for this is because it simplifies pattern matching. | |
String : Set | |
String = List Char | |
data Regex : Set where | |
both : Regex → Regex → Regex | |
then : Regex → Regex → Regex | |
or : Regex → Regex → Regex | |
char : Char → Regex | |
star : Regex → Regex | |
empty : Regex | |
data Split : String → String → String → Set where | |
nil : (s : String) → Split s [] s | |
cons : (c : Char)(s : String){s1 s2 : String} | |
→ Split s s1 s2 | |
→ Split (c ∷ s) (c ∷ s1) s2 | |
data _∈_ : String → Regex → Set where | |
char : (c : Char) → (c ∷ []) ∈ char c | |
or1 : (s : String)(r1 r2 : Regex) | |
→ s ∈ r1 | |
→ s ∈ or r1 r2 | |
or2 : (s : String)(r1 r2 : Regex) | |
→ s ∈ r2 | |
→ s ∈ or r1 r2 | |
both : (s : String)(r1 r2 : Regex) | |
→ s ∈ r1 → s ∈ r2 | |
→ s ∈ both r1 r2 | |
then : (r₁ r₂ : Regex)(s s1 s2 : String) | |
→ Split s s1 s2 | |
→ s1 ∈ r₁ | |
→ s2 ∈ r₂ | |
→ s ∈ then r₁ r₂ | |
starz : (r : Regex) → [] ∈ star r | |
stars : (r : Regex)(s s1 s2 : String) | |
→ Split s s1 s2 | |
→ s1 ∈ r | |
→ s2 ∈ star r | |
→ s ∈ star r | |
π₁ : {r r₂ : Regex}{s : String} → s ∈ both r r₂ → s ∈ r | |
π₁ (both s r1 r2 i i₁) = i | |
π₂ : {r r₂ : Regex}{s : String} → s ∈ both r r₂ → s ∈ r₂ | |
π₂ (both s r1 r2 i i₁) = i₁ | |
either : {r r₂ : Regex}{s : String} | |
→ (s ∈ r → ⊥) → (s ∈ r₂ → ⊥) | |
→ s ∈ or r r₂ → ⊥ | |
either l r₁ (or1 s r r₂ p) = l p | |
either l r₁ (or2 s r r₂ p) = r₁ p | |
¬-char : {c c₁ : Char}{s : String} → c ≢ c₁ → (c ∷ s) ∈ char c₁ → ⊥ | |
¬-char neq (char c) = neq refl | |
data HasSplit (s : String)(P : (s₁ s₂ : String) → Split s s₁ s₂ → Set) : Set where | |
exists : (s₁ s₂ : String)(sp : Split s s₁ s₂) | |
→ P s₁ s₂ sp | |
→ HasSplit s P | |
ShiftOver : (s : String)(c : Char) | |
(P : (s₁ s₂ : String) → Split (c ∷ s) s₁ s₂ → Set) | |
→ (s₁ s₂ : String) → Split s s₁ s₂ → Set | |
ShiftOver s c P s₁ s₂ sp = P (c ∷ s₁) s₂ (cons c s sp) | |
shiftOverDec : {s : String}(c : Char) | |
{P : (s₁ s₂ : String) → Split (c ∷ s) s₁ s₂ → Set} | |
→ ((s₁ s₂ : String)(sp : Split (c ∷ s) s₁ s₂) → Dec (P s₁ s₂ sp)) | |
→ ((s₁ s₂ : String)(sp : Split s s₁ s₂) | |
→ Dec (ShiftOver s c P s₁ s₂ sp)) | |
shiftOverDec c dec s₁ s₂ sp = dec (c ∷ s₁) s₂ (cons c _ sp) | |
decHasSplit : (s : String){P : (s₁ s₂ : String) → Split s s₁ s₂ → Set} | |
→ ((s₁ s₂ : String)(sp : Split s s₁ s₂) → Dec (P s₁ s₂ sp)) | |
→ Dec (HasSplit s P) | |
decHasSplit [] decP with decP [] [] (nil []) | |
decHasSplit [] decP | yes p = yes (exists [] [] (nil []) p) | |
decHasSplit [] decP | no ¬p = no contra | |
where contra : HasSplit [] _ → ⊥ | |
contra (exists .[] .[] (nil .[]) x) = ¬p x | |
decHasSplit (x ∷ s) decP with decHasSplit s (shiftOverDec x decP) | decP _ _ (nil (x ∷ s)) | |
decHasSplit (x ∷ s) decP | yes p | yes p₁ = yes (exists [] (x ∷ s) (nil (x ∷ s)) p₁) | |
decHasSplit (x ∷ s) decP | yes (exists s₁ s₂ sp x₁) | no ¬p = yes (exists (x ∷ s₁) s₂ (cons x s sp) x₁) | |
decHasSplit (x ∷ s) decP | no ¬p | yes p = yes (exists [] (x ∷ s) (nil (x ∷ s)) p) | |
decHasSplit (x ∷ s) decP | no ¬p | no ¬p₁ = no contra | |
where contra : HasSplit (x ∷ s) _ → ⊥ | |
contra (exists .[] .(x ∷ s) (nil ._) x₁) = ¬p₁ x₁ | |
contra (exists (.x ∷ s₁) s₂ (cons .x .s sp) x₁) = ¬p (exists s₁ s₂ sp x₁) | |
data StarR (r : Regex) (s₁ s₂ : String) : Set where | |
rstar : s₁ ∈ r → s₂ ∈ star r → StarR r s₁ s₂ | |
rπ₁ : {r : Regex}{s₁ s₂ : String} → StarR r s₁ s₂ → s₁ ∈ r | |
rπ₁ (rstar p _) = p | |
rπ₂ : {r : Regex}{s₁ s₂ : String} → StarR r s₁ s₂ → s₂ ∈ star r | |
rπ₂ (rstar _ p) = p | |
data ThenR (r₁ r₂ : Regex) (s₁ s₂ : String) : Set where | |
rthen : s₁ ∈ r₁ → s₂ ∈ r₂ → ThenR r₁ r₂ s₁ s₂ | |
trπ₁ : {r₁ r₂ : Regex}{s₁ s₂ : String} → ThenR r₁ r₂ s₁ s₂ → s₁ ∈ r₁ | |
trπ₁ (rthen p _) = p | |
trπ₂ : {r₁ r₂ : Regex}{s₁ s₂ : String} → ThenR r₁ r₂ s₁ s₂ → s₂ ∈ r₂ | |
trπ₂ (rthen _ p) = p | |
match : (s : String)(r : Regex) → Dec (s ∈ r) | |
{-# NO_TERMINATION_CHECK #-} | |
decStar : (r : Regex)(s s₁ s₂ : String)(sp : Split s s₁ s₂) | |
→ Dec (StarR r s₁ s₂) | |
decThen : (r₁ r₂ : Regex)(s s₁ s₂ : String)(sp : Split s s₁ s₂) | |
→ Dec (ThenR r₁ r₂ s₁ s₂) | |
decStar r s s₁ s₂ sp with match s₁ r | match s₂ (star r) | |
decStar r s s₁ s₂ sp | yes p | yes p₁ = yes (rstar p p₁) | |
decStar r s s₁ s₂ sp | yes p | no ¬p = no (λ p → ¬p (rπ₂ p)) | |
decStar r s s₁ s₂ sp | no ¬p | p₂ = no (λ p → ¬p (rπ₁ p)) | |
decThen r₁ r₂ s s₁ s₂ sp with match s₁ r₁ | match s₂ r₂ | |
decThen r₁ r₂ s s₁ s₂ sp | yes p | yes p₁ = yes (rthen p p₁) | |
decThen r₁ r₂ s s₁ s₂ sp | yes p | no ¬p = no (λ p → ¬p (trπ₂ p)) | |
decThen r₁ r₂ s s₁ s₂ sp | no ¬p | yes p = no (λ p → ¬p (trπ₁ p)) | |
decThen r₁ r₂ s s₁ s₂ sp | no ¬p | no ¬p₁ = no (λ p → ¬p (trπ₁ p)) | |
match s (both r r₁) with match s r | match s r₁ | |
match s (both r r₁) | yes p | yes p₁ = yes (both s r r₁ p p₁) | |
match s (both r r₁) | yes p | no ¬p = no (λ p → ¬p (π₂ p)) | |
match s (both r r₁) | no ¬p | yes p = no (λ p → ¬p (π₁ p)) | |
match s (both r r₁) | no ¬p | no ¬p₁ = no (λ p → ¬p (π₁ p)) | |
match s (or r r₁) with match s r | match s r₁ | |
match s (or r r₁) | yes p | yes p₁ = yes (or1 s r r₁ p) | |
match s (or r r₁) | yes p | no ¬p = yes (or1 s r r₁ p) | |
match s (or r r₁) | no ¬p | yes p = yes (or2 s r r₁ p) | |
match s (or r r₁) | no ¬p | no ¬p₁ = no (λ p → either ¬p ¬p₁ p) | |
match [] (char x) = no (λ ()) | |
match (x ∷ s) (char x₁) with x ≟ x₁ | |
match (x ∷ []) (char .x) | yes refl = yes (char x) | |
match (x ∷ x₁ ∷ s) (char .x) | yes refl = no (λ ()) | |
match (x ∷ s) (char x₁) | no ¬ = no (λ p → ¬-char ¬ p) | |
match s empty = no (λ ()) | |
match [] (star r) = yes (starz r) | |
match (x ∷ s) (star r) with decHasSplit (x ∷ s) (decStar r (x ∷ s)) | |
match (x ∷ s) (star r) | yes (exists s₁ s₂ sp (rstar x₁ x₂)) = yes (stars r (x ∷ s) s₁ s₂ sp x₁ x₂) | |
match (x ∷ s) (star r) | no ¬p = no contra | |
where contra : (x ∷ s) ∈ star r → ⊥ | |
contra (stars .r .(x ∷ s) s1 s2 x₁ p p₁) = ¬p (exists s1 s2 x₁ (rstar p p₁)) | |
match s (then r₁ r₂) with decHasSplit s (decThen r₁ r₂ s) | |
match s (then r₁ r₂) | yes (exists s₁ s₂ sp (rthen x x₁)) = yes (then r₁ r₂ s s₁ s₂ sp x x₁) | |
match s (then r₁ r₂) | no ¬p = no contra | |
where contra : s ∈ then r₁ r₂ → ⊥ | |
contra (then .r₁ .r₂ .s s1 s2 x p p₁) = ¬p (exists s1 s2 x (rthen p p₁)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment