Last active
February 25, 2019 16:56
-
-
Save bioshazard/30939024e639857d73d1fa5da5a69b2b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
occuranceCount = { } | |
# TY http://algo.pw/algo/64/python <3 | |
class AhoNode: | |
def __init__(self): | |
self.goto = {} | |
self.out = [] | |
self.fail = None | |
def aho_create_forest(patterns): | |
root = AhoNode() | |
for path in patterns: | |
node = root | |
for symbol in path: | |
node = node.goto.setdefault(symbol, AhoNode()) | |
node.out.append(path) | |
return root | |
def aho_create_statemachine(patterns): | |
root = aho_create_forest(patterns) | |
queue = [] | |
for node in root.goto.itervalues(): | |
queue.append(node) | |
node.fail = root | |
while len(queue) > 0: | |
rnode = queue.pop(0) | |
for key, unode in rnode.goto.iteritems(): | |
queue.append(unode) | |
fnode = rnode.fail | |
while fnode != None and not fnode.goto.has_key(key): | |
fnode = fnode.fail | |
unode.fail = fnode.goto[key] if fnode else root | |
unode.out += unode.fail.out | |
return root | |
def aho_find_all(s, root, callback): | |
node = root | |
for i in xrange(len(s)): | |
while node != None and not node.goto.has_key(s[i]): | |
node = node.fail | |
if node == None: | |
node = root | |
continue | |
node = node.goto[s[i]] | |
for pattern in node.out: | |
callback(i - len(pattern) + 1, pattern) | |
############################ | |
# Demonstration of work | |
def on_occurence(pos, pattern): | |
global occuranceCount | |
global haystack | |
if pattern not in occuranceCount: | |
occuranceCount[pattern] = 0 | |
occuranceCount[pattern] += 1 | |
debugOutput = "%d, At pos %s found pattern: %s\n" % (occuranceCount[pattern], pos, pattern) | |
sys.stderr.write(haystack) | |
sys.stderr.write(debugOutput) | |
import sys | |
# Prep a list of strings to match | |
if len(sys.argv) != 2: | |
sys.stderr.write("Requires 1 arg: file with patterns") | |
sys.exit(1) | |
patternSource = sys.argv[1] | |
needles = [] | |
for line in open(patternSource).readlines(): | |
needles.append(line[:-1]) # Drop the \n off the end | |
root = aho_create_statemachine(needles) | |
linecnt = 0 | |
haystack = '' | |
for haystack in sys.stdin: | |
linecnt += 1 | |
aho_find_all(haystack, root, on_occurence) | |
sys.stderr.write("Line %d\n" % linecnt) | |
for key, val in occuranceCount.items(): | |
print( "%s %d" % (key, val) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment