Created
August 3, 2016 05:25
-
-
Save mehaase/9c0ced4c60d84516f290481149288f6b to your computer and use it in GitHub Desktop.
extract dictionary words from a strnig
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with open('/usr/share/dict/american-english') as vocab_file: | |
_vocab = {line.strip() for line in vocab_file} | |
def _extract_words(string, min_=3): | |
''' Return set of maximal length, non-overlapping dictionary words in `string`. ''' | |
words = set() | |
candidate_len = len(string) | |
while candidate_len >= min_: | |
for start in range(0, len(string) - candidate_len + 1): | |
candidate = string[start:start+candidate_len] | |
if candidate in _vocab: | |
words.add(candidate) | |
string = string[:start] + string[start+candidate_len:] | |
candidate_len = len(string) + 1 | |
break | |
candidate_len -= 1 | |
return words | |
print('_extract_words("padlock")', _extract_words('padlock')) | |
print('_extract_words("lockpad")', _extract_words('lockpad')) | |
print('_extract_words("abarc")', _extract_words('abarc')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment