Created
December 2, 2020 16:25
-
-
Save TOTBWF/c077cb4149447eba6cfce7ae208dd610 to your computer and use it in GitHub Desktop.
Brzozowski Derivatives in Agda
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- Regexes via Brzozowski derivatives | |
| {-# OPTIONS --without-K --safe #-} | |
| module Regex where | |
| open import Function | |
| open import Data.Char | |
| open import Data.Char.Properties renaming (_≟_ to _≟ᶜ_) | |
| open import Data.String using (toList) | |
| open import Data.Bool | |
| open import Data.Empty | |
| open import Data.List | |
| open import Data.List.Properties | |
| open import Relation.Nullary | |
| open import Relation.Binary.PropositionalEquality hiding ([_]) | |
| data Regex (A : Set) : Set where | |
| -- Matches exactly one instance of the specified character. | |
| ⟦_⟧ : A → Regex A | |
| -- Matches either the left or right hand regexes | |
| _∪_ : Regex A → Regex A → Regex A | |
| -- Sequencing of regexes. Matches the first, followed by the second. | |
| _≫_ : Regex A → Regex A → Regex A | |
| -- Kleene Star. Matches 0 or more occurances of the provided regex. | |
| _* : Regex A → Regex A | |
| -- The empty regex. Matches nothing. | |
| ∅ : Regex A | |
| -- The terminal regex. Matches the empty string. | |
| ε : Regex A | |
| infixr 6 _≫_ | |
| data Match {A : Set} : Regex A → List A → Set where | |
| match-⟦⟧ : (a : A) → Match ⟦ a ⟧ [ a ] | |
| matchₗ : {r₁ r₂ : Regex A} → (as : List A) → Match r₁ as → Match (r₁ ∪ r₂) as | |
| matchᵣ : {r₁ r₂ : Regex A} → (as : List A) → Match r₂ as → Match (r₁ ∪ r₂) as | |
| -- This equality proof is bad and should feel bad. However, if it is not here Agda will get stuck | |
| -- trying to pattern match on Match (r₁ ≫ r₂) []... | |
| match-≫ : {r₁ r₂ : Regex A} → (as₁ as₂ as : List A) → (as₁ ++ as₂ ≡ as) → Match r₁ as₁ → Match r₂ as₂ → Match (r₁ ≫ r₂) as | |
| match-*-[] : {r : Regex A} → Match (r *) [] | |
| match-*-∷ : {r : Regex A} → (as as₁ as₂ : List A) → (as₁ ++ as₂ ≡ as) → Match r as₁ → Match (r *) as₂ → Match (r *) as | |
| match-ε : Match ε [] | |
| empty? : ∀ {A} → (r : Regex A) → Dec (Match r []) | |
| empty? ⟦ x ⟧ = no λ () | |
| empty? (r₁ ∪ r₂) with (empty? r₁) | (empty? r₂) | |
| ... | yes p | yes q = yes (matchₗ [] p) | |
| ... | no p | yes q = yes (matchᵣ [] q) | |
| ... | yes p | no q = yes (matchₗ [] p) | |
| ... | no p | no q = no λ { (matchₗ _ m) → p m ; (matchᵣ _ m) → q m } | |
| empty? (r₁ ≫ r₂) with (empty? r₁) | (empty? r₂) | |
| ... | yes p | yes q = yes (match-≫ [] [] [] refl p q) | |
| ... | no p | yes q = no λ { (match-≫ [] [] [] _ m _) → p m } | |
| ... | yes p | no q = no λ { (match-≫ [] [] [] _ _ m) → q m } | |
| ... | no p | no q = no λ { (match-≫ [] [] [] _ m _) → p m } | |
| empty? (r *) = yes match-*-[] | |
| empty? ∅ = no λ () | |
| empty? ε = yes match-ε | |
| -- "Observes" whether or not a given regex can denote the empty string. | |
| -- If it can, we will return ε, otherwise ∅. | |
| empty : ∀ {A} → Regex A → Regex A | |
| empty ⟦ x ⟧ = ∅ | |
| empty (r₁ ∪ r₂) = empty r₁ ∪ empty r₂ | |
| empty (r₁ ≫ r₂) = empty r₁ ≫ empty r₂ | |
| empty (r *) = ε | |
| empty ∅ = ∅ | |
| empty ε = ε | |
| -- Take a Brzozowski Derivative with respect to some character. | |
| -- The intuition here is 'deriv r x' will match all strings that | |
| -- 'r' matched that start with 'x', but the returned match will not contain the 'x'. | |
| deriv : Regex Char → Char → Regex Char | |
| deriv ⟦ x ⟧ a = case x ≟ᶜ a of λ { (yes _) → ε ; (no _) → ∅ } | |
| deriv (r₁ ∪ r₂) a = (deriv r₁ a) ∪ deriv r₂ a | |
| deriv (r₁ ≫ r₂) a = (deriv r₁ a ≫ r₂) ∪ (empty r₁ ≫ (deriv r₂ a)) | |
| deriv (r *) a = deriv r a ≫ (r *) | |
| deriv ∅ a = ∅ | |
| deriv ε a = ∅ | |
| match-empty : ∀ {A : Set} {r : Regex A} {xs : List A} → Match (empty r) xs → xs ≡ [] | |
| match-empty {r = r ∪ r₁} (matchₗ _ m) = match-empty m | |
| match-empty {r = r ∪ r₁} (matchᵣ _ m) = match-empty m | |
| match-empty {r = r ≫ r₁} (match-≫ as₁ as₂ _ eq m₁ m₂) rewrite match-empty m₁ | match-empty m₂ = sym eq | |
| match-empty {r = r *} match-ε = refl | |
| match-empty {r = ε} match-ε = refl | |
| empty-sound : ∀ {A} {r : Regex A} {xs : List A} → Match (empty r) xs → Match r [] | |
| empty-sound {r = r ∪ r₁} {xs = xs} (matchₗ .xs m) = matchₗ [] (empty-sound m) | |
| empty-sound {r = r ∪ r₁} {xs = xs} (matchᵣ .xs m) = matchᵣ [] (empty-sound m) | |
| empty-sound {r = r ≫ r₁} {xs = xs} (match-≫ as₁ as₂ .xs eq m₁ m₂) = match-≫ [] [] [] refl (empty-sound m₁) (empty-sound m₂) | |
| empty-sound {r = r *} {xs = .[]} match-ε = match-*-[] | |
| empty-sound {r = ε} {xs = .[]} match-ε = match-ε | |
| match-sound : ∀ {r : Regex Char} {x : Char} {xs : List Char} → Match (deriv r x) xs → Match r (x ∷ xs) | |
| match-sound {r = ⟦ a ⟧} {x = x} {xs = xs} m with a ≟ᶜ x | |
| match-sound {⟦ a ⟧} {x = .a} {xs = .[]} match-ε | yes refl = match-⟦⟧ a | |
| match-sound {⟦ a ⟧} {x = x} {xs = xs} () | no p | |
| match-sound {r = r ∪ r₁} {x = x} {xs = xs} (matchₗ .xs m) = matchₗ (x ∷ xs) (match-sound m) | |
| match-sound {r = r ∪ r₁} {x = x} {xs = xs} (matchᵣ .xs m) = matchᵣ (x ∷ xs) (match-sound m) | |
| match-sound {r = r ≫ r₁} {x = x} {xs = xs} (matchₗ .xs (match-≫ as₁ as₂ .xs eq m₁ m₂)) = match-≫ (x ∷ as₁) as₂ (x ∷ xs) (cong (x ∷_) eq) (match-sound m₁) m₂ | |
| match-sound {r = r ≫ r₁} {x = x} {xs = xs} (matchᵣ .xs (match-≫ as₁ as₂ .xs eq m₁ m₂)) rewrite match-empty m₁ = match-≫ [] (x ∷ as₂) (x ∷ xs) (cong (x ∷_) eq) (empty-sound m₁) (match-sound m₂) | |
| match-sound {r = r *} {x = x} {xs = xs} (match-≫ as₁ as₂ .xs eq m₁ m₂) = match-*-∷ (x ∷ xs) (x ∷ as₁) as₂ (cong (x ∷_) eq) (match-sound m₁) m₂ | |
| empty-complete : ∀ {r : Regex Char} → Match r [] → Match (empty r) [] | |
| empty-complete {.(_ ∪ _)} (matchₗ .[] m) = matchₗ [] (empty-complete m) | |
| empty-complete {.(_ ∪ _)} (matchᵣ .[] m) = matchᵣ [] (empty-complete m) | |
| empty-complete {.(_ ≫ _)} (match-≫ [] [] .[] x m₁ m₂) = match-≫ [] [] [] refl (empty-complete m₁) (empty-complete m₂) | |
| empty-complete {.(_ *)} match-*-[] = match-ε | |
| empty-complete {.(_ *)} (match-*-∷ .[] as₁ as₂ x m m₁) = match-ε | |
| empty-complete {.ε} match-ε = match-ε | |
| match-complete : ∀ {r : Regex Char} {x : Char} {xs : List Char} → Match r (x ∷ xs) → Match (deriv r x) xs | |
| match-complete {.(⟦ x ⟧)} {x} {.[]} (match-⟦⟧ .x) with x ≟ᶜ x | |
| ... | yes p = match-ε | |
| ... | no contra = ⊥-elim (contra refl) | |
| match-complete {.(_ ∪ _)} {x} {xs} (matchₗ .(x ∷ xs) m) = matchₗ xs (match-complete m) | |
| match-complete {.(_ ∪ _)} {x} {xs} (matchᵣ .(x ∷ xs) m) = matchᵣ xs (match-complete m) | |
| match-complete {.(_ ≫ _)} {x} {xs} (match-≫ [] (x₁ ∷ as₂) .(x ∷ xs) eq m₁ m₂) rewrite ∷-injectiveˡ eq = matchᵣ xs (match-≫ [] as₂ xs (∷-injectiveʳ eq) (empty-complete m₁) (match-complete m₂)) | |
| match-complete {.(_ ≫ _)} {x} {xs} (match-≫ (x₁ ∷ as₁) as₂ .(x ∷ xs) eq m₁ m₂) rewrite ∷-injectiveˡ eq = matchₗ xs (match-≫ as₁ as₂ xs (∷-injectiveʳ eq) (match-complete m₁) m₂) | |
| match-complete {.(_ *)} {x} {xs} (match-*-∷ .(x ∷ xs) [] as₂ eq m₁ m₂) rewrite eq = match-complete m₂ | |
| match-complete {.(_ *)} {x} {xs} (match-*-∷ .(x ∷ xs) (x₁ ∷ as₁) as₂ eq m₁ m₂) rewrite ∷-injectiveˡ eq = match-≫ as₁ as₂ xs (∷-injectiveʳ eq) (match-complete m₁) m₂ | |
| match : ∀ (r : Regex Char) → (as : List Char) → Dec (Match r as) | |
| match r [] = empty? r | |
| match r (x ∷ xs) with match (deriv r x) xs | |
| ... | yes m = yes (match-sound m) | |
| ... | no ¬m = no λ m → ¬m (match-complete m) | |
| -- Dumb hacks :) | |
| [a-z] : Regex Char | |
| [a-z] = foldr (λ c r → ⟦ c ⟧ ∪ r) ∅ (toList "abcdefghijklnmopqrztuvwxyz") | |
| [0-9] : Regex Char | |
| [0-9] = foldr (λ c r → ⟦ c ⟧ ∪ r) ∅ (toList "0123456789") | |
| _+ : Regex Char → Regex Char | |
| r + = r ≫ (r *) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment