-
-
Save mebusw/f3a1e6f5ef570244cc70dd154e6a998f to your computer and use it in GitHub Desktop.
用 CPS 实现字符串匹配。基于讲座 https://youtu.be/cnhb4M8-J5M
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Re(object): | |
def match(self, str, idx, cont): | |
raise NotImplementedError() | |
def matches(self, str): | |
return self.match(str, 0, lambda i: i == len(str)) | |
class ReChar(Re): | |
# ReChar(c) represents the character literal c. | |
def __init__(self, char): self.char = char | |
def match(self, str, idx, cont=lambda x: True): | |
# If the character at idx is the one we're looking for, we match! | |
if len(str) > idx and str[idx] == self.char: | |
# If we match, we call our continuation, to check that the rest of | |
# the string matches | |
return cont(idx+1) | |
return False # we didn't match | |
class ReSeq(Re): | |
# ReSeq(a, b) represents /ab/ | |
def __init__(self, first, second): self.first, self.second = first, second | |
def match(self, str, idx, cont=lambda x: True): | |
# first try matching self.first | |
return self.first.match(str, idx, | |
# giving a modified continuation that checks | |
# whether self.second matches | |
lambda idx: self.second.match(str, idx, cont)) | |
class ReOr(Re): | |
# ReOr(a,b) represents /a|b/ | |
def __init__(self, first, second): self.first, self.second = first, second | |
def match(self, str, idx, cont=lambda x: True): | |
return (self.first.match(str, idx, cont) # try matching self.first | |
or self.second.match(str, idx, cont) # else try self.second | |
) | |
# Note how we might call our continuation twice - backtracking! | |
class ReStar(Re): | |
# ReStar(a) represents /a*/ | |
def __init__(self, inner): self.inner = inner | |
def match(self, str, idx, cont=lambda x: True): | |
return ( | |
# the one-or-more case, like /aa*/ | |
self.inner.match(str, idx, | |
lambda idx: self.match(str, idx, cont)) | |
# the empty case, like // | |
or cont(idx)) | |
def atLeastOne(re): | |
return ReSeq(re, ReStar(re)) | |
assert ReChar('a').match('a', 0) ==True | |
assert ReChar('a').match('b', 0) ==False | |
assert ReOr(ReChar('a'), ReChar('b')).match('b', 0) ==True | |
assert ReOr(ReChar('a'), ReChar('b')).match('c', 0) ==False | |
assert ReSeq(ReChar('a'), ReChar('c')).match('ac', 0) ==True | |
assert ReStar(ReChar('a')).match('aa', 0) ==True | |
assert ReStar(ReChar('a')).match('aaa', 0) ==True | |
assert ReStar(ReChar('a')).match('aab', 0) ==True | |
assert ReSeq(ReStar(ReChar('a')), ReChar('c')).match('aaac', 0) ==True | |
assert atLeastOne(ReChar('a')).match('', 0) ==False | |
assert ReSeq(ReSeq(ReChar('a'), ReChar('b')), ReChar('c')).match('abc', 0) ==True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def lit(c): | |
def re(s, cont=lambda x: True): | |
if not (True and s): return False | |
return c == s[0] and cont(s[1:]) | |
return re | |
def seq(re_a, re_b): | |
def re(s, cont=lambda x: True): | |
return re_a(s, cont=lambda rest: re_b(rest, cont)) | |
return re | |
def either(re_a, re_b): | |
# def f(s, cont=lambda x: True): | |
# return x(s, cont) or y(s, cont) | |
# return f | |
return lambda s, cont=lambda x:True : re_a(s, cont) or re_b(s, cont) | |
def star(re): | |
def f(s, cont=lambda x: True): | |
return re(s, cont=lambda rest: re(rest, cont)) | |
return f | |
def at_least_one(re): | |
return seq(re, star(re)) | |
assert lit('a')('a') | |
assert lit('a')('b') ==False | |
assert seq(lit('a'),lit('b'))('ab') | |
assert either(lit('a'),lit('b'))('b') | |
assert either(lit('a'),lit('b'))('c')==False | |
assert star(lit('a'))('aa') | |
assert star(lit('a'))('') ==False | |
assert star(lit('a'))('bb') ==False | |
assert at_least_one(lit('a'))('')==False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Re = | |
| Lit of char | |
| Seq of Re * Re | |
| Or of Re * Re | |
| Star of Re | |
let rec match2 (re: Re) (str: char list) k = | |
match re with | |
| Lit(c) -> match str with | |
| [] -> false | |
| x::xs -> (x = c) && (k xs) | |
| Seq(a, b) -> match2 a str (fun rest -> match2 b rest k) | |
| Or(a, b) -> match2 a str k || match2 b str k | |
| Star(a) -> match2 a str (fun rest -> match2 (Star a) rest k) || k str | |
match2 (Lit 'a') ("b" |> Seq.toList) (fun x -> true) | |
match2 (Star (Lit 'a')) ("aaab" |> Seq.toList) (fun x -> true) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cps; | |
import java.util.function.Function; | |
public interface Re { | |
boolean match(String s, Function<String, Boolean> kont); | |
default boolean match(String s) { | |
return match(s, kont -> true); | |
} | |
/** | |
* 匹配字符 | |
* @param charLiteral | |
* @return | |
*/ | |
static Re lit(char charLiteral) { | |
return (s, kont) -> { | |
if (s.isEmpty()) { | |
return false; | |
} | |
return charLiteral == s.charAt(0) && kont.apply(s.substring(1)); | |
}; | |
} | |
/** | |
* 按顺序匹配a b | |
* @param a | |
* @param b | |
* @return | |
*/ | |
static Re seq(Re a, Re b) { | |
return (s, kont) -> a.match(s, rest -> b.match(rest, kont)); | |
} | |
/** | |
* 匹配a或b | |
* @param a | |
* @param b | |
* @return | |
*/ | |
static Re either(Re a, Re b) { | |
return (s, kont) -> a.match(s, kont) || b.match(s, kont); | |
} | |
/** | |
* 匹配0到N个 | |
* @param re | |
* @return | |
*/ | |
static Re star(Re re) { | |
return (s, kont) -> re.match(s, rest -> star(re).match(rest, kont)) | |
|| kont.apply(s); | |
} | |
static Re atLeastOne(Re re) { | |
return seq(re, star(re)); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cps; | |
import org.junit.Test; | |
import static cps.Re.*; | |
import static org.junit.Assert.assertTrue; | |
public class ReTest { | |
@Test | |
public void should_match() { | |
assertTrue(lit('a').match("a")); | |
assertTrue(!lit('b').match("a")); | |
assertTrue(either(lit('a'), lit('b')).match("b")); | |
assertTrue(seq(lit('a'), lit('b')).match("ab")); | |
assertTrue(star(lit('a')).match("aa")); | |
assertTrue(!atLeastOne(lit('a')).match("")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment