Last active
September 24, 2023 18:36
-
-
Save amanuel2/d20e8c9e7000c5ce5a9ccb4cb6c4a949 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################ Robin Karp Rolling Hash (string searching) | |
class Solution: | |
# Rabin-Karp algorithm | |
# In the hashing algo, we can have hash collision and when we have hash collision, 2 | |
# different words can have the same hash. So we need to check that 2 words with same hash | |
# are really the same to avoid false positive. | |
# This check frequency can be reduced by taking multiple hashes. In case we take 2 hashes | |
# the probability of false negatives can go down to 10^-9. So we need to check only once. | |
# Still the time complexity is O(N + M) | |
# Can be made more optimum for worst case by using KMP. | |
def strStr(self, haystack: str, needle: str) -> int: | |
def charToIndex(char): return ord(char) - ord('a') | |
def hashString(string): | |
base = 26 | |
modulo = 10**9 + 7 | |
hash = 0 | |
# In this case the first char gets the highest weight of base^(n-1) | |
for char in string: | |
hash = (hash * base + charToIndex(char)) % modulo | |
return hash | |
def updateHash(hash, removeLetter, addLetter, windowLen): | |
base = 26 | |
modulo = 10**9 + 7 | |
maxWeight = pow(base, windowLen-1, modulo) # base ** (windowLen - 1) | |
return ( | |
# Letter getting removed was first and had base ^ n - 1 weight | |
base * (hash - charToIndex(removeLetter) * maxWeight) | |
+ charToIndex(addLetter) # New letter add will have 1 weight | |
) % modulo | |
if len(needle) > len(haystack): return -1 | |
if needle == haystack: return 0 | |
# Find hash for the first m characters for the initial comparision when i = 0 | |
hashNeedle = hashString(needle) | |
hash = hashString(haystack[:len(needle)]) | |
# O(n+m) | |
for i in range(len(haystack) - len(needle) + 1): | |
# If the hash is same as needle then confrim if the string in current window matches | |
# needles to avoid false positives (diff strings having same hash due to collision). | |
if hash == hashNeedle: | |
if needle == haystack[i: i + len(needle)]: return i | |
# Update hash: | |
# When we reach the last window we can't update hash anymore | |
if i != len(haystack) - len(needle): | |
# Remove first letter from hash and add letter next to current window in hash | |
hash = updateHash(hash, haystack[i], haystack[i + len(needle)], len(needle)) | |
return -1 | |
class Solution: | |
def longestDupSubstring(self, S: str) -> str: | |
# https://www.youtube.com/watch?v=BfUejqd07yo&t=357s | |
mod = 1_000_000_007 | |
def fn(k): | |
"""Return duplicated substring of length k.""" | |
p = pow(26, k, mod) | |
hs = 0 | |
seen = defaultdict(set) | |
for i, ch in enumerate(s): | |
hs = (26*hs + ord(ch) - 97) % mod | |
if i >= k: hs = (hs - (ord(s[i-k])-97)*p) % mod # rolling hash | |
if i+1 >= k: | |
if hs in seen and s[i+1-k:i+1] in seen[hs]: return s[i+1-k:i+1] # resolve hash collision | |
seen[hs].add(s[i+1-k:i+1]) | |
return "" | |
lo, hi = 0, len(s)-1 | |
while lo < hi: | |
mid = lo + hi + 1 >> 1 | |
if fn(mid): lo = mid | |
else: hi = mid - 1 | |
return fn(lo) | |
################################ KMP Algorithm (string searching) | |
class Solution: | |
def strStr(self, haystack: str, needle: str) -> int: | |
# KMP Algorithm: build a table to keep track of patterns so we don't have to restart | |
def build_pattern(substring): | |
pattern = [0 for _ in substring] | |
# two pointers one at the first, next position | |
j = 0 | |
for i in range(1, len(substring)): | |
while j > 0 and substring[i] != substring[j]: | |
j = pattern[j - 1] | |
if substring[i] == substring[j]: | |
j += 1 | |
pattern[i] = j | |
return pattern | |
def does_match(haystack, needle, pattern): | |
i = j = 0 | |
while i < len(haystack): | |
if haystack[i] == needle[j]: | |
i += 1 | |
j += 1 | |
if j == len(needle): | |
return i - j # Found a match | |
else: | |
if j > 0: | |
j = pattern[j - 1] | |
else: | |
i += 1 | |
return -1 | |
pattern = build_pattern(needle) | |
# print(pattern) | |
return does_match(haystack, needle, pattern) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment