Last active
January 27, 2017 14:44
-
-
Save alphaKAI/cc2747fe67f0c3231ee7328a1d6e419b to your computer and use it in GitHub Desktop.
VM based regex, in D and Haskell ported from Scala(http://qiita.com/yyu/items/84b1a00459408d1a7321)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.stdio; | |
import std.typecons, | |
std.format, | |
std.traits, | |
std.range; | |
bool instanceof(alias Type, alias Variable)() { | |
return (cast(Type)Variable) !is null; | |
} | |
string caseof(alias instance, Type, Variables...)() { | |
string code = | |
q{assert (instanceof!(%s, %s), | |
"Type doesn't match, caseof expects that the variable `instance` is typed as %s, but given `instance` is not an instance of %s");} | |
.format(Type.stringof, instance.stringof, Type.stringof, Type.stringof); | |
string[] vtypes; | |
string[] vnames_type; | |
string[] vnames_req; | |
foreach (vtype; Fields!Type) { | |
vtypes ~= vtype.stringof; | |
} | |
foreach (vname; FieldNameTuple!Type) { | |
vnames_type ~= vname; | |
} | |
foreach (variable; Variables) { | |
vnames_req ~= variable; | |
} | |
foreach (vtype, vname_type, vname_req; zip(vtypes, vnames_type, vnames_req)) { | |
code ~= "%s %s = (cast(%s)%s).%s;".format(vtype, vname_req, Type.stringof, instance.stringof, vname_type); | |
} | |
return code; | |
} | |
class Regex { | |
static VMInst[] compile(Regex re) { | |
Tuple!(VMInst[], int) loop(Regex re, int n) { | |
if (instanceof!(Empty, re)) { | |
return tuple(cast(VMInst[])[], n); | |
} else if (instanceof!(Let, re)) { | |
mixin(caseof!(re, Let, "c")); | |
return tuple(cast(VMInst[])[new C(c)], n + 1); | |
} else if (instanceof!(Con, re)) { | |
mixin(caseof!(re, Con, "a", "b")); | |
auto ret1 = loop(a, n); | |
auto c1 = ret1[0], i1 = ret1[1]; | |
auto ret2 = loop(b, i1); | |
auto c2 = ret2[0], i2 = ret2[1]; | |
return tuple(c1 ~ c2, i2); | |
} else if (instanceof!(Alt, re)) { | |
mixin(caseof!(re, Alt, "a", "b")); | |
auto ret1 = loop(a, n); | |
auto c1 = ret1[0], i1 = ret1[1]; | |
auto ret2 = loop(b, i1 + 1); | |
auto c2 = ret2[0], i2 = ret2[1]; | |
auto c = (cast(VMInst[])[new S(n + 1, i1 + 1)]) ~ c1 ~ (cast(VMInst[])[new J(i2)]) ~ c2; | |
return tuple(c, i2); | |
} else if (instanceof!(Star, re)) { | |
mixin(caseof!(re, Star, "a")); | |
auto ci = loop(a, n + 1); | |
auto c = ci[0], i = ci[1]; | |
return tuple((cast(VMInst[])[new S(n + 1, i + 1)]) ~ c ~ (cast(VMInst[])[new J(n)]), i + 1); | |
} | |
throw new Error("no support such a Regex type"); | |
} | |
return loop(re, 0)[0] ~ cast(VMInst)(new M); | |
} | |
} | |
class Empty : Regex {} | |
class Con : Regex { Regex a, b; this (Regex _a, Regex _b) { this.a = _a; this.b = _b; } } | |
class Alt : Regex { Regex a, b; this (Regex _a, Regex _b) { this.a = _a; this.b = _b; } } | |
class Star : Regex { Regex a; this (Regex _a) { this.a = _a; } } | |
class Let : Regex { char c; this (char _c) { this.c = _c; } } | |
Empty empty() { return new Empty; } | |
Con con(Regex a, Regex b) { return new Con(a, b); } | |
Alt alt(Regex a, Regex b) { return new Alt(a, b); } | |
Star star(Regex a) { return new Star(a); } | |
Let let(char c) { return new Let(c); } | |
class VMInst { | |
static bool test(VMInst[] c, string s) { | |
s ~= "\0"; | |
bool loop(int pc, int sp) { | |
auto tx = c[pc]; | |
if (instanceof!(C, tx)) { | |
mixin(caseof!(tx, C, "l")); | |
if (s[sp] != l) { | |
return false; | |
} else { | |
return loop(pc + 1, sp + 1); | |
} | |
} else if (instanceof!(M, tx)) { | |
return true; | |
} else if (instanceof!(J, tx)) { | |
mixin(caseof!(tx, J, "n")); | |
return loop(n, sp); | |
} else if (instanceof!(S, tx)) { | |
mixin(caseof!(tx, S, "x", "y")); | |
if (loop(x, sp)) { | |
return true; | |
} else { | |
return loop(y, sp); | |
} | |
} | |
throw new Error("no support such a VMInst"); | |
} | |
return loop(0, 0); | |
} | |
} | |
class C : VMInst { char c; this (char _c) { this.c = _c; } } | |
class J : VMInst { int x; this (int _x) { this.x = _x; } } | |
class S : VMInst { int x, y; this (int _x, int _y) { this.x = _x; this.y = _y; } } | |
class M : VMInst {} | |
void main() { | |
auto re1 = con(con(let('a'), star(let('a'))), con(let('b'), star(let('b')))); | |
auto c = Regex.compile(re1); | |
writeln("c : ", c); | |
writeln(VMInst.test(c, "aabb")); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data Regex = Empty | |
| Con Regex Regex | |
| Alt Regex Regex | |
| Star Regex | |
| Let Char deriving Show; | |
data VMInst = C Char | |
| J Int | |
| S Int Int | |
| M deriving Show; | |
compile :: Regex -> [VMInst] | |
compile re = do | |
(fst $ loop re 0) ++ [M] | |
where | |
loop :: Regex -> Int -> ([VMInst], Int) | |
loop regex n = case regex of | |
Empty -> ([], n) | |
Let c -> ([C c], n + 1) | |
Con a b -> do | |
let (c1, i1) = loop a n | |
(c2, i2) = loop b i1 | |
in (c1 ++ c2, i2) | |
Alt a b -> do | |
let (c1, i1) = loop a $ n + 1 | |
(c2, i2) = loop b $ i1 + 1 | |
c = [S (n + 1) (i1 + 1)] ++ c1 ++ [J i2] ++ c2 | |
in (c, i2) | |
Star a -> do | |
let (c, i) = loop a $ n + 1 | |
([S (n + 1) (i + 1)] ++ c ++ [J n], i + 1) | |
test :: [VMInst] -> [Char] -> Bool | |
test c s = do | |
loop 0 0 | |
where | |
loop :: Int -> Int -> Bool | |
loop pc sp = case c!!pc of | |
C l -> if (s!!sp /= l) then False else loop (pc + 1) (sp + 1) | |
M -> True | |
J n -> loop n sp | |
S x y -> if loop x sp then True else loop y sp | |
main :: IO() | |
main = do | |
let re1 = Con (Con (Let 'a') (Star (Let 'a'))) (Con (Let 'b') (Star (Let 'b'))) | |
let c = compile re1 | |
print $ test c ['a', 'a', 'b', 'b', '\0'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment