Last active
August 29, 2015 14:20
-
-
Save y-yu/aa2899a98792d099da5b to your computer and use it in GitHub Desktop.
VM型の正規表現エンジンを実装する ref: http://qiita.com/yyu/items/84b1a00459408d1a7321
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def compile(re: Regex): List[VMInst] = { | |
def loop(re: Regex, n: Int): (List[VMInst], Int) = re match { | |
case Empty => (Nil, n) | |
case Let(c) => (List(C(c)), n + 1) | |
case Con(a, b) => | |
val (c1, i1) = loop(a, n) | |
val (c2, i2) = loop(b, i1) | |
(c1 ++ c2, i2) | |
case Alt(a, b) => | |
val (c1, i1) = loop(a, n + 1) | |
val (c2, i2) = loop(b, i1 + 1) | |
val c = List(S(n + 1, i1 + 1)) ++ c1 ++ List(J(i2)) ++ c2 | |
(c, i2) | |
case Star(Star(a)) => loop(Star(a), n) | |
case Star(a) => | |
val (c, i) = loop(a, n + 1) | |
(List(S(n + 1, i + 1)) ++ c ++ List(J(n)), i + 1) | |
} | |
loop(re, 0)._1 ++ List(M) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
char a | |
split 2, 4 | |
char a | |
jmp 1 | |
char b | |
split 6, 8 | |
char b | |
jmp 5 | |
match |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
split 1, 5 | |
split 2, 4 | |
char a | |
jmp 1 | |
jmp 0 | |
char a | |
match |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let re1 = Con(Con(Let('a'), Star(Let('a'))), Con(Let('b'), Star(Let('b')))); | |
let c = compile re1; | |
let b = test c ['a', 'a', 'b', 'b', '\0']; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object TestRegex { | |
def main(args: Array[String]): Unit = { | |
val re1 = Con(Con(Let('a'), Star(Let('a'))), Con(Let('b'), Star(Let('b')))) | |
val c = Regex.compile(re1) | |
println(VMInst.test(c, List('a', 'a', 'b', 'b', '\0'))) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment