Last active
January 9, 2022 16:49
-
-
Save cheery/53fbd4184bad8c2e5addfab758e0ca91 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Regex2 where | |
open import Function | |
open import Data.Bool | |
open import Data.Maybe | |
open import Data.Product | |
open import Data.Sum | |
open import Data.List as List | |
open import Data.Empty | |
open import Data.Unit | |
open import Relation.Binary.PropositionalEquality using (_≡_; refl; cong; trans; subst; sym) | |
open import Relation.Nullary using (Dec) | |
data Singleton {a} {A : Set a} (x : A) : Set a where | |
_with≡_ : (y : A) → x ≡ y → Singleton x | |
inspect : ∀ {a} {A : Set a} (x : A) → Singleton x | |
inspect x = x with≡ refl | |
module Definition (A : Set) (semantic : A → Set) (_≟_ : (x y : A) → Dec (x ≡ y)) where | |
data RegExp : Set where | |
ε : RegExp | |
a : A → RegExp | |
_⋃_ : RegExp → RegExp → RegExp | |
_∙_ : RegExp → RegExp → RegExp | |
_* : RegExp → RegExp | |
RegExpType : RegExp → Set | |
RegExpType ε = ⊤ | |
RegExpType (a x) = semantic x | |
RegExpType (regex ⋃ regex₁) = RegExpType regex ⊎ RegExpType regex₁ | |
RegExpType (regex ∙ regex₁) = RegExpType regex × RegExpType regex₁ | |
RegExpType (regex *) = List (RegExpType regex) | |
Char : Set | |
Char = Σ A semantic | |
String : Set | |
String = List Char | |
data _‣_ : String → RegExp → Set where | |
empty : [] ‣ ε | |
symbol : {x : A} → {y : semantic x} → [( x , y )] ‣ a x | |
union₁ : {u : String} → {e₁ e₂ : RegExp} → u ‣ e₁ → u ‣ (e₁ ⋃ e₂) | |
union₂ : {u : String} → {e₁ e₂ : RegExp} → u ‣ e₂ → u ‣ (e₁ ⋃ e₂) | |
con : {u v : String} → {e₁ e₂ : RegExp} → u ‣ e₁ → v ‣ e₂ → (u ++ v) ‣ (e₁ ∙ e₂) | |
star0 : {e : RegExp} → [] ‣ (e *) | |
star-cons : {u v : String} → {e : RegExp} → u ‣ e → v ‣ (e *) → (u ++ v) ‣ (e *) | |
generate : (regex : RegExp) → RegExpType regex → String | |
generate ε tt = [] | |
generate (a x) s = [( x , s )] | |
generate (regex ⋃ regex₁) (inj₁ x) = generate regex x | |
generate (regex ⋃ regex₁) (inj₂ y) = generate regex₁ y | |
generate (regex ∙ regex₁) (fst , snd) = generate regex fst ++ generate regex₁ snd | |
generate (regex *) xs = concat (List.map (generate regex) xs) | |
generate-theorem : (regex : RegExp) → (value : RegExpType regex) → (generate regex value) ‣ regex | |
generate-theorem ε tt = empty | |
generate-theorem (a x) value = symbol | |
generate-theorem (regex ⋃ regex₁) (inj₁ x) = union₁ (generate-theorem regex x) | |
generate-theorem (regex ⋃ regex₁) (inj₂ y) = union₂ (generate-theorem regex₁ y) | |
generate-theorem (regex ∙ regex₁) (fst , snd) = con (generate-theorem regex fst) (generate-theorem regex₁ snd) | |
generate-theorem (regex *) [] = star0 | |
generate-theorem (regex *) (x ∷ values) = star-cons (generate-theorem regex x) (generate-theorem (regex *) values) | |
data Pattern : (y : Set) → Set₁ where | |
Reject : ∀{y} → Pattern y | |
Accept : ∀{y} → y → Pattern y | |
Exact : (x : A) → Pattern (semantic x) | |
Group : ∀{a b c} → (a × b → c) → Pattern a → Pattern b → Pattern c | |
Alt : ∀{a b c} → (a ⊎ b → c) → Pattern a → Pattern b → Pattern c | |
Star : ∀{a} → Pattern a → Pattern (List a) | |
With : ∀{a b} → (a → b) → Pattern a → Pattern b | |
pat : (regex : RegExp) → Pattern (RegExpType regex) | |
pat ε = Accept tt | |
pat (a x) = Exact x | |
pat (regex ⋃ regex₁) = Alt id (pat regex) (pat regex₁) | |
pat (regex ∙ regex₁) = Group id (pat regex) (pat regex₁) | |
pat (regex *) = Star (pat regex) | |
-- Small combinator to erase sums whenever branches do not represent parse trees. | |
vanish : ∀{a : Set} → a ⊎ a → a | |
vanish (inj₁ v) = v | |
vanish (inj₂ v) = v | |
-- Whether the pattern refers to a plain rejection. | |
rejected : ∀{a} → Pattern a -> Bool | |
rejected Reject = true | |
rejected (Accept x) = false | |
rejected (Exact _) = false | |
rejected (Group k f g) = rejected f | |
rejected (Alt k f g) = rejected f ∧ rejected g | |
rejected (Star f) = false | |
rejected (With h g) = rejected g | |
-- Removes catenation whenever accepted or rejected item is derived. | |
group : ∀{a b c} → ((a × b) → c) → Pattern a → Pattern b → Pattern c | |
group k Reject y = Reject | |
group k (Accept i) = With (λ(j) → k (i , j)) | |
group k (Exact x) = Group k (Exact x) | |
group k (Group f x' y') = Group k (Group f x' y') | |
group k (Alt f x' y') = Group k (Alt f x' y') | |
group k (Star x') = Group k (Star x') | |
group k (With f x') = Group (k ∘ Data.Product.map₁ f) x' | |
-- Rejection check that only checks one layer. | |
rejected₁ : ∀{a} → Pattern a → Bool | |
rejected₁ Reject = true | |
rejected₁ (Accept x) = false | |
rejected₁ (Exact x) = false | |
rejected₁ (Group x x₁ x₂) = false | |
rejected₁ (Alt x x₁ x₂) = false | |
rejected₁ (Star x) = false | |
rejected₁ (With x x₁) = false | |
-- Removes alternation whenever either branch is clearly rejected. | |
alt : ∀{a b c} → (a ⊎ b → c) → Pattern a → Pattern b → Pattern c | |
alt k x y with rejected₁ x | rejected₁ y | |
alt k x y | false | false = Alt k x y | |
alt k x _ | false | true = With (k ∘ inj₁) x | |
alt k _ y | true | false = With (k ∘ inj₂) y | |
alt k _ _ | true | true = Reject | |
mutual | |
-- Produce a Brzozowski derivative with the given input symbol. | |
derive : ∀{a} → Pattern a → Σ A semantic → Pattern a | |
derive Reject v = Reject | |
derive (Accept _) v = Reject | |
derive (Exact x) v with x ≟ (proj₁ v) | |
... | Dec.yes refl = Accept (proj₂ v) | |
... | Dec.no ¬p = Reject | |
derive (Group f x y) v with accept x | |
... | just x' = alt vanish (group f (derive x v) y) (With (λ(y') → f (x' , y')) (derive y v)) | |
... | nothing = group f (derive x v) y | |
derive (Alt f x y) v = alt f (derive x v) (derive y v) | |
derive (Star x) v = group (λ(x , xs) → x ∷ xs) (derive x v) (Star x) | |
derive (With f x) v with derive x v | |
... | Reject = Reject | |
... | Accept x' = Accept (f x') | |
... | Exact x' = With f (Exact x') | |
... | Group f' x' y' = Group (f ∘ f') x' y' | |
... | Alt f' x' y' = Alt (f ∘ f') x' y' | |
... | Star x' = With f (Star x') | |
... | With f' x' = With (f ∘ f') x' | |
-- Tell whether pattern represents accepted value. | |
accept : ∀{a} → Pattern a → Maybe a | |
accept Reject = nothing | |
accept (Accept x) = just x | |
accept (Exact x) = nothing | |
accept (Group f x y) with accept x | accept y | |
... | just x' | just y' = just (f (x' , y')) | |
... | just _ | nothing = nothing | |
... | nothing | _ = nothing | |
accept (Alt f x y) with accept x | accept y | |
... | just x' | _ = just (f (inj₁ x')) -- Resolve ambiguity to the left side. | |
... | nothing | just y' = just (f (inj₂ y')) | |
... | nothing | nothing = nothing | |
accept (Star x) = just [] | |
accept (With f x) = Data.Maybe.map f (accept x) | |
-- Determine whether pattern is ambiguous. | |
ambiguous : ∀{a} → Pattern a → Bool | |
ambiguous Reject = false | |
ambiguous (Accept x) = false | |
ambiguous (Exact x) = false | |
ambiguous (Group f x y) with accept x | accept y | |
... | just x' | just y' = ambiguous x ∨ ambiguous y | |
... | just x' | nothing = false | |
... | nothing | _ = false | |
ambiguous (Alt f x y) with accept x | accept y | |
... | just _ | just _ = true | |
... | just _ | nothing = ambiguous x | |
... | nothing | just _ = ambiguous y | |
... | nothing | nothing = false | |
ambiguous (Star x) = false | |
ambiguous (With f x) = ambiguous x | |
parse : (regex : RegExp) → String → Maybe (RegExpType regex) | |
parse shape xs = accept (foldl derive (pat shape) xs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment