Created
November 10, 2020 15:21
-
-
Save abailly/071ecb8264cfda932361c2f09ccdaa8a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
||| A proof of the validity of matching algorithm for | |
||| regular expressions | |
||| | |
||| From https://arxiv.org/abs/2003.06458 | |
module re | |
import Decidable.Equality | |
%default total | |
infix 7 .| | |
infix 6 ... | |
||| Representation of Regular expressions | |
data R : Type where | |
R_0 : R | |
R_1 : R | |
R_char : Char -> R | |
(.|) : R -> R -> R | |
(...) : R -> R -> R | |
R_star : R -> R | |
||| Matching algorithms | |
data R_match : List Char -> R -> Type where | |
R_match_zero : R_match s R_0 | |
R_match_unit : R_match [] R_1 | |
R_match_char : {c : Char} -> R_match (c :: _) (R_char c) | |
R_match_plus1 : {s : List Char} -> {r1, r2 : R} -> R_match s r1 -> R_match s (r1 .| r2) | |
R_match_plus2 : {s : List Char} -> {r1, r2 : R} -> R_match s r2 -> R_match s (r1 .| r2) | |
R_match_times : {s1, s2 : List Char} -> {r1, r2 : R} -> | |
R_match s1 r1 -> R_match s2 r2 -> R_match (s1 ++ s2) (r1 ... r2) | |
R_match_star1 : {r : R} -> R_match [] (R_star r) | |
R_match_star2 : {s1, s2 : List Char} -> {r : R} -> | |
R_match s1 r -> | |
R_match s2 (R_star r) -> R_match (s1 ++ s2) (R_star r) | |
data R_match_partial : (whole : List Char) -> (r : R) -> Type where | |
R_match_here : (matched, rest : List Char) -> (r_match : R_match matched r) -> R_match_partial (matched ++ rest) r | |
||| A proof that the list `ls` can be split in 2 parts | |
data Split : (ls : List a) -> Type where | |
IsSplit : (pref : List a) -> (suff : List a) -> Split (pref ++ suff) | |
prepend : (x : a) -> Split xs -> Split (x :: xs) | |
prepend x (IsSplit pref suff) = IsSplit (x :: pref) suff | |
splits : (ls : List a) -> List (Split ls) | |
splits ls = IsSplit [] ls :: splits' ls | |
where | |
splits' : (ls : List a) -> List (Split ls) | |
splits' [] = [] | |
splits' (x :: xs) = | |
let subsplits = splits' xs | |
in IsSplit [x] xs :: map (prepend x) subsplits | |
mutual | |
check_partial : (re : R) -> (Either String (R_match_partial s re), List Char) -> Split s -> (Either String (R_match_partial s re), List Char) | |
check_partial _ r@(Right _, _) _ = r | |
check_partial re (Left err, _) (IsSplit pref suff) with (accept pref re) | |
check_partial re (Left err, _) (IsSplit pref suff) | (Left l) = (Left l, suff) | |
check_partial re (Left err, _) (IsSplit pref suff) | (Right x) = (Right (R_match_here pref suff x), suff) | |
accept_partial : (r : R) -> (s : List Char) | |
-> Either String (R_match_partial s r) | |
accept_partial r s = | |
let subs = splits s | |
in fst $ foldl (check_partial r) (Left "no match", the (List Char) []) subs | |
accept : (s : List Char) -> (r : R) -> Either String (R_match s r) | |
accept s R_0 = Right R_match_zero | |
accept [] R_1 = Right R_match_unit | |
accept s R_1 = Left $ "'" ++ pack s ++ "' is not empty" | |
accept [y] (R_char x) with (decEq x y) | |
accept [y] (R_char x) | (Yes prf) = rewrite prf in Right R_match_char | |
accept [y] (R_char x) | (No contra) = Left $ show y ++ " does not match " ++ show x | |
accept [] (R_char x) = Left $ "empty string cannot match char " ++ show x | |
accept s (x .| y) with (accept s x) | |
accept s (x .| y) | (Left l) with (accept s y) | |
accept s (x .| y) | (Left l) | (Left z) = Left l | |
accept s (x .| y) | (Left l) | (Right r) = Right (R_match_plus2 r) | |
accept s (x .| y) | (Right r) = Right (R_match_plus1 r) | |
accept s (x ... y) with (accept_partial x s) | |
accept s (x ... y) | (Left l) = Left l | |
accept (matched ++ rest) (x ... y) | (Right (R_match_here matched rest r_match)) with (accept rest y) | |
accept (matched ++ rest) (x ... y) | (Right (R_match_here matched rest r_match)) | (Left l) = Left l | |
accept (matched ++ rest) (x ... y) | (Right (R_match_here matched rest r_match)) | (Right r) = Right $ R_match_times r_match r | |
-- empty string matches <re>* | |
accept [] (R_star x) = Right $ R_match_star1 | |
-- non empty string matches <re>* if a prefix matches <re> | |
-- and the rest matches <re>* | |
accept s (R_star x) with (accept_partial x s) | |
accept s (R_star x) | (Left l) = Left l | |
accept (matched ++ []) (R_star x) | (Right (R_match_here matched [] r_match)) = Right $ R_match_star2 r_match R_match_star1 | |
accept (matched ++ rest) (R_star x) | (Right (R_match_here matched rest r_match)) with (assert_total $ accept rest (R_star x)) | |
accept (matched ++ rest) (R_star x) | (Right (R_match_here matched rest r_match)) | (Left l) = (Left l) | |
accept (matched ++ rest) (R_star x) | (Right (R_match_here matched rest r_match)) | (Right r) = Right $ R_match_star2 r_match r | |
accept s _ = Left $ "failed to match " ++ pack s | |
namespace Test | |
test_splits : splits [1,2] = [IsSplit [] [1,2], IsSplit [1] [2], IsSplit [1,2] []] | |
test_splits = Refl | |
test_empty : accept [] R_1 = Right R_match_unit | |
test_empty = Refl | |
test_seq_match_1 : accept ['a','b'] (R_char 'a' ... R_char 'b') = Right (R_match_times {s1=['a']} {s2=['b']} R_match_char R_match_char) | |
test_seq_match_1 = Refl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment