Skip to content

Instantly share code, notes, and snippets.

@mehaase
Created August 3, 2016 05:25
Show Gist options
  • Save mehaase/9c0ced4c60d84516f290481149288f6b to your computer and use it in GitHub Desktop.
Save mehaase/9c0ced4c60d84516f290481149288f6b to your computer and use it in GitHub Desktop.
extract dictionary words from a strnig
with open('/usr/share/dict/american-english') as vocab_file:
_vocab = {line.strip() for line in vocab_file}
def _extract_words(string, min_=3):
''' Return set of maximal length, non-overlapping dictionary words in `string`. '''
words = set()
candidate_len = len(string)
while candidate_len >= min_:
for start in range(0, len(string) - candidate_len + 1):
candidate = string[start:start+candidate_len]
if candidate in _vocab:
words.add(candidate)
string = string[:start] + string[start+candidate_len:]
candidate_len = len(string) + 1
break
candidate_len -= 1
return words
print('_extract_words("padlock")', _extract_words('padlock'))
print('_extract_words("lockpad")', _extract_words('lockpad'))
print('_extract_words("abarc")', _extract_words('abarc'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment