Last active
August 29, 2015 14:23
-
-
Save altnight/545658da496c1cbb94c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
- https://speakerdeck.com/anatoo/phpdexue-buvmxing-zheng-gui-biao-xian-enzinfalseshi-zu-mi | |
- http://blog.asial.co.jp/1221 | |
""" | |
import copy | |
class RegexVMThread(object): | |
def __init__(self): | |
self.string_pointer = 0 | |
self.program_counter = 0 | |
class RegexVMInstruction(object): | |
CHAR = 0 | |
MATCH = 1 | |
JUMP = 2 | |
SPLIT = 3 | |
def __init__(self, instruction_type, param=None): | |
self.type_ = instruction_type | |
self.param = param | |
class RegexVM(object): | |
@classmethod | |
def run(cls, string, instructions): | |
threads = [] | |
thread = RegexVMThread() | |
while True: | |
# 現在の命令を取り出す | |
instruction = instructions[thread.program_counter] | |
if not instruction: | |
raise Exception() | |
while True: | |
if instruction.type_ == RegexVMInstruction.CHAR: | |
# patch | |
try: | |
char = string[thread.string_pointer] | |
except IndexError: | |
char = 0 | |
# マッチする場合 | |
if char == instruction.param: | |
thread.string_pointer += 1 | |
thread.program_counter += 1 | |
else: | |
if threads: | |
# スレッドがまだある場合には、 | |
# 現在のスレッドを捨ててそのスレッドに切り替える | |
thread = threads.pop() | |
else: | |
# スレッドがもうない場合には、失敗する | |
return False | |
break | |
elif instruction.type_ == RegexVMInstruction.SPLIT: | |
# スレッドを分割する | |
new_thread = copy.deepcopy(thread) | |
new_thread.program_counter = instruction.param | |
threads.insert(0, new_thread) | |
thread.program_counter += 1 | |
break | |
elif instruction.type_ == RegexVMInstruction.MATCH: | |
# マッチする | |
return True | |
elif instruction.type_ == RegexVMInstruction.JUMP: | |
# 特定の命令に飛ぶ | |
thread.program_counter = instruction.param | |
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vm = RegexVM() | |
instructions = [ | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'), | |
RegexVMInstruction(RegexVMInstruction.MATCH) | |
] | |
print(vm.run('a', instructions)) # True | |
print(vm.run('b', instructions)) # False | |
print('-' * 40) | |
instructions = [ | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'b'), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'c'), | |
RegexVMInstruction(RegexVMInstruction.MATCH) | |
] | |
print(RegexVM.run('abc', instructions)); # True | |
print(RegexVM.run('bc', instructions)); # False | |
print('-' * 40) | |
# /(ab)|(cd)/という正規表現を表現する命令列 | |
instructions = [ | |
RegexVMInstruction(RegexVMInstruction.SPLIT, 4), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'b'), | |
RegexVMInstruction(RegexVMInstruction.JUMP, 6), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'c'), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'd'), | |
RegexVMInstruction(RegexVMInstruction.MATCH) | |
] | |
print(RegexVM.run('ab', instructions)); # True | |
print(RegexVM.run('cd', instructions)); # True | |
print(RegexVM.run('bc', instructions)); # False | |
print('-' * 40) | |
# /a?/という正規表現を表現する命令列 | |
instructions = [ | |
RegexVMInstruction(RegexVMInstruction.SPLIT, 2), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'), | |
RegexVMInstruction(RegexVMInstruction.MATCH) | |
] | |
print(RegexVM.run('', instructions)); # => True | |
print(RegexVM.run('a', instructions)); # => True | |
print('-' * 40) | |
# /(ab)*/という正規表現を表現する命令列 | |
instructions = [ | |
RegexVMInstruction(RegexVMInstruction.SPLIT, 4), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'), | |
RegexVMInstruction(RegexVMInstruction.CHAR, 'b'), | |
RegexVMInstruction(RegexVMInstruction.JUMP, 0), | |
RegexVMInstruction(RegexVMInstruction.MATCH) | |
] | |
print(RegexVM.run('ab', instructions)); # => True | |
print(RegexVM.run('abab', instructions)); # => True | |
print(RegexVM.run('ababab', instructions)); # => True | |
print(RegexVM.run('', instructions)); # => True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment