Created
June 20, 2013 04:27
-
-
Save thisiswei/5820300 to your computer and use it in GitHub Desktop.
5 months ago.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A portmanteau word is a blend of two or more words, like 'mathelete', | |
which comes from 'math' and 'athelete' | |
rules are: a portmanteau must be composed of three non-empty pieces, | |
start+mid+end, | |
'adolescented' comes from | |
'adolescent','scented', with | |
start+mid+end='adole'+'scent'+'ed'. | |
score: 'adole'+'scent'+'ed', the | |
start,mid,end lengths are 5,5,2 and the total length is 12. The ideal | |
start,mid,end lengths are 12/4,12/2,12/4 = 3,6,3. So the final score | |
12 - abs(5-3) - abs(5-6) - abs(2-3) = 8. | |
A portmanteau must be composed of two different words (not the same word twice). | |
The output of natalie(words) should be the best portmanteau, or None """ | |
#-------------------------------------------------------------------------- | |
import itertools | |
import doctest | |
def natalie(words): | |
results = [] | |
# ['int', 'intimate', 'hinter', 'hint', 'winter'] | |
for w1, w2 in itertools.combinations(words,2): | |
if w1 not in w2 and w2 not in w1: | |
for m in presubfixes(w1):# all possible prefix,subfixes from w1 | |
if m in w2 and valid(m, w1, w2): #invalid => ('hi' 'hint' 'hiner') | |
results.append(three_parts(m, w1, w2)) | |
break #(presub fixes are sorted by lens), when we find :'initi' not need to do 'init' | |
return sorted(results) if results else None | |
def valid(m, w1, w2): | |
return w1.startswith(m) + w2.startswith(m) == 1 | |
# middle word1 word2 | |
# ('phant', 'elephant', 'phantom') => 'ele + phant + om' | |
#('int', 'intimate', 'hint') => 'h + int + imate' | |
def three_parts(m, w1, w2): | |
L = len(m) | |
parts = ((w1[:-L], m, w2[L:]) if w2.startswith(m) else | |
(w2[:w2.index(m)], m, w1[L:])) | |
if not all(parts): return # if not compose of start, mid, end | |
return calculate(*parts), ''.join(parts) | |
def calculate(*words): | |
s, m, e = map(len, words) | |
t = s + m + e | |
return t - abs(s-t/4.) - abs(m-t/2.) - abs(e-t/4.) | |
# >>> presubfixes('kimono') | |
#['kimon', 'imono', 'kimo', 'mono', 'kim', 'ono', 'no', 'ki', 'o', 'k'] | |
def presubfixes(w): | |
mixes = [(w[i:],w[:i]) for i in range(1, len(w))] | |
return sorted([y for x in mixes for y in x], key=len)[::-1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment