Last active
December 18, 2015 11:09
-
-
Save liushuaikobe/5773812 to your computer and use it in GitHub Desktop.
Boyer-Moore algorithm Implementation via Python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2013-6-13 | |
@author: liushuai | |
''' | |
import time | |
badCharShift = {} | |
goodSuffixShift = {} | |
buildShiftTime = 0 | |
count = 0 | |
def BoyerMoore(text, pattern, debug = False): | |
global badCharShift, goodSuffixShift, count, buildShiftTime | |
start = time.time() | |
badCharShift = getOffsetBybadChar(pattern, debug) | |
goodSuffixShift = getOffsetBygoodSuffix(pattern, debug) | |
end = time.time() | |
buildShiftTime = end - start | |
pLen = len(pattern) | |
tLen = len(text) | |
w = pLen - 1 | |
while True: | |
if w > tLen: | |
return False | |
flag = False | |
for i in range(pLen)[::-1]: | |
tmp = w - ( pLen - i ) + 1 | |
if text[tmp] != pattern[i]: | |
w += max(lookupBadChar(i, text[tmp]), lookupGoodSuffx(i + 1)) | |
count += 1 | |
# w += lookupBadChar(i, text[tmp]) | |
if debug: | |
print text | |
print ' ' * (w - pLen + 1) + pattern | |
flag = True | |
break | |
if not flag: | |
return w - pLen + 1 | |
def getOffsetBybadChar(pattern, debug = False): | |
shift = {} | |
charTable = {}.fromkeys(pattern).keys() | |
for i in range(len(pattern)): | |
for badChar in charTable: | |
if pattern[i] == badChar: | |
continue | |
for j in range(i)[::-1]: | |
if pattern[j] == badChar: | |
shift[(i, badChar)] = i - j | |
break | |
if debug: | |
print shift | |
return shift | |
def getOffsetBygoodSuffix(pattern, debug = False): | |
shift = {} | |
for indexOfPattern in range(1 ,len(pattern)): | |
flag = False | |
for i in range(indexOfPattern, len(pattern)): | |
if i == indexOfPattern: | |
tmp = pattern[0:len(pattern) - 1].rfind(pattern[i:]) | |
if tmp != -1 and tmp != indexOfPattern: | |
shift[indexOfPattern] = i - tmp | |
flag = True | |
break | |
else: | |
if pattern.startswith(pattern[i:]): | |
shift[indexOfPattern] = i | |
flag = True | |
break | |
if not flag: | |
shift[indexOfPattern] = -1 | |
if debug: | |
print shift | |
return shift | |
def lookupBadChar(indexOfPattern, badChar): | |
global badCharShift | |
return badCharShift[(indexOfPattern, badChar)] if (indexOfPattern, badChar) in badCharShift else indexOfPattern + 1 | |
def lookupGoodSuffx(indexOfPattern): | |
global goodSuffixShift | |
return goodSuffixShift[indexOfPattern] if indexOfPattern in goodSuffixShift else 0 | |
def main(): | |
text = 'THCS CS A SCBPLE EXABPLE' | |
pattern = 'EXABPLE' | |
offset = BoyerMoore(text, pattern, True) | |
if not offset: | |
print 'lost' | |
else : | |
print 'hit at : ', offset | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment