Created
January 23, 2024 00:16
-
-
Save okurka12/230e57df5e5a53bfa20ed51a0b102eae to your computer and use it in GitHub Desktop.
Generation of the prefix table for the Knuth-Morris-Pratt algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# part of the knuth-morris-pratt algorithm: | |
# generation of the prefix table/fail vector | |
# | |
# víteček 🔥💯 (okurka12), January 2024 | |
# | |
def ppre(s: str) -> set[str]: | |
"""returns a set of all proper prefixes of `s`""" | |
output = set() | |
for i in range(1, len(s)): | |
output.add(s[:i]) | |
return output | |
def psuf(s: str) -> set[str]: | |
"""returns a set of all proper suffixes of `s`""" | |
output = set() | |
for i in range(1, len(s)): | |
output.add(s[i:]) | |
return output | |
def vec_fail(s: str) -> list[int]: | |
"""returns a fail vector of `s`""" | |
# every fail vector has this | |
output = [-1, 0] | |
# iterate through increasingly large prefixes of `s` | |
# example: s="ABABC..." -> first "AB", then "ABA", "ABAB", "ABABC", ... | |
for i in range(2, len(s)): | |
# current prefix of length i | |
current = s[:i] | |
# intersection of proper suffixes and prefixes | |
inters = ppre(current).intersection(psuf(current)) | |
# length of the greatest common suffix and prefix | |
val = max([len(s) for s in inters]) if len(inters) > 0 else 0 | |
output.append(val) | |
return output | |
def printpresuf(s: str) -> None: | |
"""prints prefix table/fail vector of `s`""" | |
print(s) | |
print(f" - fail vector: {vec_fail(s)}") | |
def main() -> None: | |
printpresuf("ABABCBABA") | |
printpresuf("BARBAROSSA") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment