Created
May 8, 2019 01:31
-
-
Save newjam/ee7dd9a0d1058c43ac02bf3305b00f6e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from sys import stdin | |
from sys import argv | |
from collections import defaultdict | |
# Adapted from Intro to Algorithms by CLRS. | |
# A string matching automata similar to grep. | |
def computeTransitionFunction(pattern): | |
m = len(pattern) | |
alphabet = set(pattern) | |
transitions = defaultdict(lambda: 0) # if the letter is not in the pattern go back to state 0 | |
for q in range(m): | |
for a in alphabet: | |
k = min(m , q + 1) | |
while not (pattern[:q] + a).endswith(pattern[:k]): | |
k -= 1 | |
transitions[q, a] = k | |
return transitions | |
if len(argv) != 2: | |
print('usage: ' + argv[0] + ' <pattern>') | |
else: | |
pattern = argv[1] | |
t = computeTransitionFunction(pattern) | |
for line in stdin: | |
q = 0 | |
for a in line: | |
q = t[q, a] | |
if q == len(pattern): | |
print(line[:-1]) | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment