Created
January 1, 2014 13:46
-
-
Save creamidea/8208158 to your computer and use it in GitHub Desktop.
horspool字符匹配算法
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
def create_shift_table(pattern): | |
ascii_table = {} | |
len_pattern = len(pattern) | |
for i in range(0, 128): # 初始化表 | |
ascii_table[chr(i)]=len_pattern | |
for i in range(0, len_pattern-1): # 构造移动表,排除模式的最后一个字符 | |
p = pattern[i:i+1] | |
ascii_table[p] = len_pattern - i - 1 # 算出该字符距离右边的距离 | |
return ascii_table | |
def horspool_matching(pattern, text): | |
move_num = 0 # 计数移动次数 | |
match_num = 0 # 计数总的匹配次数 | |
len_p = len(pattern) | |
len_t = len(text) | |
shift_table = create_shift_table(pattern) | |
i = len_p - 1 # 从右边开始匹配, i-k | |
pos = -1 # 用于记录匹配成功的位置 | |
while i <= len_t - 1: # 注意这里的range, range(5, 10) => [5, 6, 7, 8, 9] | |
k = 0 | |
every_match_num = 0 # 计数每一次移动前端匹配次数 | |
while k <= len_p-1 and pattern[len_p-1-k]==text[i-k]: | |
every_match_num = every_match_num + 1 | |
k = k + 1 # 匹配成功次数 | |
match_num = match_num + every_match_num | |
if k == len_p: # 匹配成功 | |
pos = i - len_p + 1 | |
break # 跳出循环 | |
else: | |
i = i + shift_table[text[i]] # i为当前匹配到的位置,加上移动的位置 | |
move_num = move_num + 1 | |
if match_num == 0: # 用于计数第一次匹配始终失败的情况 | |
match_num = move_num | |
return (pos, move_num, match_num) | |
if __name__ == '__main__': | |
pattern = 'BARBER' | |
text = 'JIM_SAW_ME_IN_A_BARBERSHOP' | |
pos, counter1, counter2 = horspool_matching(pattern, text) | |
print pos, counter1, counter2 | |
p1 = '00001' | |
p2 = '10000' | |
p3 = '01010' | |
# text = '00000' | |
text = "".join(["0" for i in range(0, 1000)]) | |
# print text | |
pos1, counter1_1, counter1_2 = horspool_matching(p1, text) | |
print pos1, counter1_1, counter1_2 | |
(pos2, counter2_1, counter2_2) = horspool_matching(p2, text) | |
print pos2, counter2_1, counter2_2 | |
pos3, counter3_1, counter3_2 = horspool_matching(p3, text) | |
print pos3, counter3_1, counter3_2 | |
# 输出结果: | |
# 16 5 7 | |
# -1 996 996 | |
# -1 996 3984 | |
# -1 498 498 |
最坏时间复杂度: O(nm)
如果是随机字符匹配,一般为:O(n)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
新的知识点:
逆转一个字符串:
犯错: