Skip to content

Instantly share code, notes, and snippets.

@TOTBWF
Created December 2, 2020 16:25
Show Gist options
  • Select an option

  • Save TOTBWF/c077cb4149447eba6cfce7ae208dd610 to your computer and use it in GitHub Desktop.

Select an option

Save TOTBWF/c077cb4149447eba6cfce7ae208dd610 to your computer and use it in GitHub Desktop.
Brzozowski Derivatives in Agda
-- Regexes via Brzozowski derivatives
{-# OPTIONS --without-K --safe #-}
module Regex where
open import Function
open import Data.Char
open import Data.Char.Properties renaming (_≟_ to _≟ᶜ_)
open import Data.String using (toList)
open import Data.Bool
open import Data.Empty
open import Data.List
open import Data.List.Properties
open import Relation.Nullary
open import Relation.Binary.PropositionalEquality hiding ([_])
data Regex (A : Set) : Set where
-- Matches exactly one instance of the specified character.
⟦_⟧ : A → Regex A
-- Matches either the left or right hand regexes
_∪_ : Regex A → Regex A → Regex A
-- Sequencing of regexes. Matches the first, followed by the second.
_≫_ : Regex A → Regex A → Regex A
-- Kleene Star. Matches 0 or more occurances of the provided regex.
_* : Regex A → Regex A
-- The empty regex. Matches nothing.
∅ : Regex A
-- The terminal regex. Matches the empty string.
ε : Regex A
infixr 6 _≫_
data Match {A : Set} : Regex A → List A → Set where
match-⟦⟧ : (a : A) → Match ⟦ a ⟧ [ a ]
matchₗ : {r₁ r₂ : Regex A} → (as : List A) → Match r₁ as → Match (r₁ ∪ r₂) as
matchᵣ : {r₁ r₂ : Regex A} → (as : List A) → Match r₂ as → Match (r₁ ∪ r₂) as
-- This equality proof is bad and should feel bad. However, if it is not here Agda will get stuck
-- trying to pattern match on Match (r₁ ≫ r₂) []...
match-≫ : {r₁ r₂ : Regex A} → (as₁ as₂ as : List A) → (as₁ ++ as₂ ≡ as) → Match r₁ as₁ → Match r₂ as₂ → Match (r₁ ≫ r₂) as
match-*-[] : {r : Regex A} → Match (r *) []
match-*-∷ : {r : Regex A} → (as as₁ as₂ : List A) → (as₁ ++ as₂ ≡ as) → Match r as₁ → Match (r *) as₂ → Match (r *) as
match-ε : Match ε []
empty? : ∀ {A} → (r : Regex A) → Dec (Match r [])
empty? ⟦ x ⟧ = no λ ()
empty? (r₁ ∪ r₂) with (empty? r₁) | (empty? r₂)
... | yes p | yes q = yes (matchₗ [] p)
... | no p | yes q = yes (matchᵣ [] q)
... | yes p | no q = yes (matchₗ [] p)
... | no p | no q = no λ { (matchₗ _ m) → p m ; (matchᵣ _ m) → q m }
empty? (r₁ ≫ r₂) with (empty? r₁) | (empty? r₂)
... | yes p | yes q = yes (match-≫ [] [] [] refl p q)
... | no p | yes q = no λ { (match-≫ [] [] [] _ m _) → p m }
... | yes p | no q = no λ { (match-≫ [] [] [] _ _ m) → q m }
... | no p | no q = no λ { (match-≫ [] [] [] _ m _) → p m }
empty? (r *) = yes match-*-[]
empty? ∅ = no λ ()
empty? ε = yes match-ε
-- "Observes" whether or not a given regex can denote the empty string.
-- If it can, we will return ε, otherwise ∅.
empty : ∀ {A} → Regex A → Regex A
empty ⟦ x ⟧ = ∅
empty (r₁ ∪ r₂) = empty r₁ ∪ empty r₂
empty (r₁ ≫ r₂) = empty r₁ ≫ empty r₂
empty (r *) = ε
empty ∅ = ∅
empty ε = ε
-- Take a Brzozowski Derivative with respect to some character.
-- The intuition here is 'deriv r x' will match all strings that
-- 'r' matched that start with 'x', but the returned match will not contain the 'x'.
deriv : Regex Char → Char → Regex Char
deriv ⟦ x ⟧ a = case x ≟ᶜ a of λ { (yes _) → ε ; (no _) → ∅ }
deriv (r₁ ∪ r₂) a = (deriv r₁ a) ∪ deriv r₂ a
deriv (r₁ ≫ r₂) a = (deriv r₁ a ≫ r₂) ∪ (empty r₁ ≫ (deriv r₂ a))
deriv (r *) a = deriv r a ≫ (r *)
deriv ∅ a = ∅
deriv ε a = ∅
match-empty : ∀ {A : Set} {r : Regex A} {xs : List A} → Match (empty r) xs → xs ≡ []
match-empty {r = r ∪ r₁} (matchₗ _ m) = match-empty m
match-empty {r = r ∪ r₁} (matchᵣ _ m) = match-empty m
match-empty {r = r ≫ r₁} (match-≫ as₁ as₂ _ eq m₁ m₂) rewrite match-empty m₁ | match-empty m₂ = sym eq
match-empty {r = r *} match-ε = refl
match-empty {r = ε} match-ε = refl
empty-sound : ∀ {A} {r : Regex A} {xs : List A} → Match (empty r) xs → Match r []
empty-sound {r = r ∪ r₁} {xs = xs} (matchₗ .xs m) = matchₗ [] (empty-sound m)
empty-sound {r = r ∪ r₁} {xs = xs} (matchᵣ .xs m) = matchᵣ [] (empty-sound m)
empty-sound {r = r ≫ r₁} {xs = xs} (match-≫ as₁ as₂ .xs eq m₁ m₂) = match-≫ [] [] [] refl (empty-sound m₁) (empty-sound m₂)
empty-sound {r = r *} {xs = .[]} match-ε = match-*-[]
empty-sound {r = ε} {xs = .[]} match-ε = match-ε
match-sound : ∀ {r : Regex Char} {x : Char} {xs : List Char} → Match (deriv r x) xs → Match r (x ∷ xs)
match-sound {r = ⟦ a ⟧} {x = x} {xs = xs} m with a ≟ᶜ x
match-sound {⟦ a ⟧} {x = .a} {xs = .[]} match-ε | yes refl = match-⟦⟧ a
match-sound {⟦ a ⟧} {x = x} {xs = xs} () | no p
match-sound {r = r ∪ r₁} {x = x} {xs = xs} (matchₗ .xs m) = matchₗ (x ∷ xs) (match-sound m)
match-sound {r = r ∪ r₁} {x = x} {xs = xs} (matchᵣ .xs m) = matchᵣ (x ∷ xs) (match-sound m)
match-sound {r = r ≫ r₁} {x = x} {xs = xs} (matchₗ .xs (match-≫ as₁ as₂ .xs eq m₁ m₂)) = match-≫ (x ∷ as₁) as₂ (x ∷ xs) (cong (x ∷_) eq) (match-sound m₁) m₂
match-sound {r = r ≫ r₁} {x = x} {xs = xs} (matchᵣ .xs (match-≫ as₁ as₂ .xs eq m₁ m₂)) rewrite match-empty m₁ = match-≫ [] (x ∷ as₂) (x ∷ xs) (cong (x ∷_) eq) (empty-sound m₁) (match-sound m₂)
match-sound {r = r *} {x = x} {xs = xs} (match-≫ as₁ as₂ .xs eq m₁ m₂) = match-*-∷ (x ∷ xs) (x ∷ as₁) as₂ (cong (x ∷_) eq) (match-sound m₁) m₂
empty-complete : ∀ {r : Regex Char} → Match r [] → Match (empty r) []
empty-complete {.(_ ∪ _)} (matchₗ .[] m) = matchₗ [] (empty-complete m)
empty-complete {.(_ ∪ _)} (matchᵣ .[] m) = matchᵣ [] (empty-complete m)
empty-complete {.(_ ≫ _)} (match-≫ [] [] .[] x m₁ m₂) = match-≫ [] [] [] refl (empty-complete m₁) (empty-complete m₂)
empty-complete {.(_ *)} match-*-[] = match-ε
empty-complete {.(_ *)} (match-*-∷ .[] as₁ as₂ x m m₁) = match-ε
empty-complete {.ε} match-ε = match-ε
match-complete : ∀ {r : Regex Char} {x : Char} {xs : List Char} → Match r (x ∷ xs) → Match (deriv r x) xs
match-complete {.(⟦ x ⟧)} {x} {.[]} (match-⟦⟧ .x) with x ≟ᶜ x
... | yes p = match-ε
... | no contra = ⊥-elim (contra refl)
match-complete {.(_ ∪ _)} {x} {xs} (matchₗ .(x ∷ xs) m) = matchₗ xs (match-complete m)
match-complete {.(_ ∪ _)} {x} {xs} (matchᵣ .(x ∷ xs) m) = matchᵣ xs (match-complete m)
match-complete {.(_ ≫ _)} {x} {xs} (match-≫ [] (x₁ ∷ as₂) .(x ∷ xs) eq m₁ m₂) rewrite ∷-injectiveˡ eq = matchᵣ xs (match-≫ [] as₂ xs (∷-injectiveʳ eq) (empty-complete m₁) (match-complete m₂))
match-complete {.(_ ≫ _)} {x} {xs} (match-≫ (x₁ ∷ as₁) as₂ .(x ∷ xs) eq m₁ m₂) rewrite ∷-injectiveˡ eq = matchₗ xs (match-≫ as₁ as₂ xs (∷-injectiveʳ eq) (match-complete m₁) m₂)
match-complete {.(_ *)} {x} {xs} (match-*-∷ .(x ∷ xs) [] as₂ eq m₁ m₂) rewrite eq = match-complete m₂
match-complete {.(_ *)} {x} {xs} (match-*-∷ .(x ∷ xs) (x₁ ∷ as₁) as₂ eq m₁ m₂) rewrite ∷-injectiveˡ eq = match-≫ as₁ as₂ xs (∷-injectiveʳ eq) (match-complete m₁) m₂
match : ∀ (r : Regex Char) → (as : List Char) → Dec (Match r as)
match r [] = empty? r
match r (x ∷ xs) with match (deriv r x) xs
... | yes m = yes (match-sound m)
... | no ¬m = no λ m → ¬m (match-complete m)
-- Dumb hacks :)
[a-z] : Regex Char
[a-z] = foldr (λ c r → ⟦ c ⟧ ∪ r) ∅ (toList "abcdefghijklnmopqrztuvwxyz")
[0-9] : Regex Char
[0-9] = foldr (λ c r → ⟦ c ⟧ ∪ r) ∅ (toList "0123456789")
_+ : Regex Char → Regex Char
r + = r ≫ (r *)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment