Created
October 15, 2014 09:59
-
-
Save ikegami-yukino/235ae5f3c090bccd031a to your computer and use it in GitHub Desktop.
Longest Contiguous Common Subsequence
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def to_ngrams(s, minimum_n): | |
"""Generate n-grams (len(string) >= n >= minimum) from string | |
Params: | |
<str> s | |
<int> minimum | |
Return: | |
<set <str>> ngrams | |
""" | |
ngrams = [] | |
length = len(s) | |
for i in xrange(length-1): | |
ngrams += [s[i:j] for j in xrange(i+minimum_n, length+1)] | |
return set(ngrams) | |
def longest_contiguous_common_subsequence(lhs, rhs, minimum_n): | |
""" | |
Params: | |
<str> lhs | |
<str> rhs | |
<int> minimum_n | |
Return: | |
<str> longest_contiguous_common_subsequence | |
""" | |
if len(lhs) < minimum_n or len(rhs) < minimum_n: | |
return None | |
lhs = self.to_ngrams(lhs, minimum_n) | |
rhs = self.to_ngrams(rhs, minimum_n) | |
common_subsequences = lhs & rhs | |
if common_subsequences: | |
return sorted(common_subsequences, key=lambda x: len(x), reverse=True)[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment