|
## Usage: |
|
## gem install combinatorics |
|
## irb |
|
## > require 'four_lettered'; |
|
## > puts histogram.map{|i| i.join(' ')}; |
|
## #pick some infrequent, and some frequent letters |
|
## > hards=poss_words({ "PKHCYBGWFVJXZQ" => 1, "AEIOU" => 3}) |
|
## > guess=hards[rand(hards.length)] # to get a starting word |
|
## #=> "ZOEA" |
|
## `open 'https://www.google.com/search?q=define+#{guess}'` # make sure its a word |
|
## poss_words({"ZOEA" => 2}) # incorporate new knowledge OR |
|
## poss_words({"ZOEA" => 0, ('A'..'Z').to_a.join('') => 4}) # incorporate new knowledge |
|
|
|
require 'combinatorics' |
|
require 'set' |
|
|
|
# All 4 letter words in /usr/share/dict/words |
|
# > head /usr/share/dict/words |
|
# A |
|
# a |
|
# aa |
|
# aal |
|
# aalii |
|
# aam |
|
# Aani |
|
# aardvark |
|
# aardwolf |
|
# Aaron |
|
|
|
# words[0..10] |
|
# => ["AANI", "AARU", "ABAC", "ABAS", "ABBA", "ABBY", "ABED", "ABEL", "ABET", "ABEY", "ABIE"] |
|
def words |
|
File.read("/usr/share/dict/words"). |
|
lines.map(&:chomp).map(&:upcase). |
|
select{|i| i.downcase =~ /^[a-z]{4}$/} |
|
end |
|
|
|
def words_h |
|
# http://wordlist.sourceforge.net/ : http://downloads.sourceforge.net/wordlist/hunspell-en_US-7.1-0.zip |
|
File.read("/Users/tsnow/Downloads/hunspell-en_US-7.1-0/en_US.dic"). |
|
lines.map{|i| i.downcase[/^[a-z]+/]}.compact.map(&:upcase). |
|
select{|i| i[/^[A-Z]{4}$/]} |
|
end |
|
|
|
# All 4 letter words by what letters they contain: |
|
# letters2words[%w(C O Y).sort] #=> ["COZY"] |
|
# letters2words[%w(E J P).sort] #=> ["JAPE", "JUPE"] |
|
# letters2words[%w(A E L).sort].length #=> 53 |
|
def letters2words(words=words) |
|
letters2words ={} |
|
words.each do |word| |
|
glob = word.split('').sort # "ABIE" => ['A','B','E','I'] |
|
next if glob.uniq != glob # no words with duplicate letters |
|
Set.new(glob).powerset.map(&:to_a).each do |part| |
|
letters2words[part] ||= [] |
|
letters2words[part].push word |
|
end |
|
end |
|
letters2words |
|
end |
|
|
|
# histogram(1,%w(A E I O U T R F V J Z Q X))[0..3] #=> [[862, "N"], [860, "L"], [800, "S"], [575, "D"]] |
|
def histogram(length=1, eliminated=%w(), letters2words=letters2words) |
|
letters2words.to_a.map{|i| [i[1].length, i[0].join('')]}. |
|
select{|a,b| b.length == length && !eliminated.include?(b)}. |
|
sort.reverse |
|
end |
|
# histogram |
|
# => |
|
%( |
|
1746 A |
|
1381 E |
|
1125 O |
|
1073 I |
|
958 R |
|
883 T |
|
862 N |
|
860 L |
|
806 U |
|
800 S |
|
575 D |
|
537 M |
|
522 P |
|
522 K |
|
516 H |
|
491 C |
|
486 Y |
|
442 B |
|
440 G |
|
366 W |
|
261 F |
|
163 V |
|
122 J |
|
101 X |
|
99 Z |
|
27 Q) |
|
|
|
# poss_words({"FORK"=>1, "DACE"=>1, "FAST"=>2, "PUCE"=>1, "SOAP"=>2, "PART"=>2, "COST"=>2, "REST"=>1}) |
|
# #=> ["AUTO"] |
|
def poss_words(knowns, letters2words=letters2words) |
|
known_space = knowns.keys.join("").split("").uniq.sort |
|
known_breakdowns = knowns.map do|k,v| [k.split(''),v] end |
|
poss_words = |
|
Set.new(known_space).powerset.to_a.select{|set| |
|
i = set.to_a; |
|
known_breakdowns.all? do | j| |
|
bk,count = *j |
|
(bk & i).length == count |
|
end && i.length < 5 |
|
}.map(&:to_a).map(&:sort).map do |i| |
|
letters2words[i.sort] |
|
end |
|
poss_words = poss_words.flatten.compact.uniq |
|
poss_words = poss_words.select{|i| |
|
known_breakdowns.all?{|j| |
|
bk, count = *j |
|
(bk & i.split('')).length == count |
|
} |
|
} |
|
|
|
poss_words |
|
end |
|
|
|
def best_words(word_choices, letters) #word_choices should have letters for all elements |
|
letters2 = letters2words(word_choices) |
|
next_letter = nil |
|
words = [] |
|
histogram(1,%w(), letters2).each do |_, letter| |
|
next_letter = letter |
|
words = letters2[(letters + [letter]).sort] || [] |
|
break unless words.empty? |
|
next_letter = nil |
|
words = [] |
|
end |
|
return [words, letters+[next_letter]] unless words.empty? |
|
[word_choices,letters] |
|
end |
|
# Returns [word, word_choices, letters_used, words_covered, words_left (uncovered), words_total] |
|
# next_word #=> ["LAME", ["ALEM", "ALME", "LAME", "LEAM", "MALE", "MALE", "MEAL", "MELA"], ["A", "E", "L", "M"], 2970, 2139, 5272] |
|
def next_word(words=words) |
|
word_choices=words |
|
letters=[] |
|
while true |
|
last = [word_choices,letters] |
|
word_choices,letters = *best_words(word_choices,letters) |
|
break if [word_choices,letters] == last |
|
end |
|
|
|
letters2 = letters2words(words) |
|
coverage = letters.map{|i| letters2[[i]]}.flatten.uniq |
|
|
|
not_covered = words - coverage |
|
return [word_choices[rand(word_choices.length)], word_choices, letters, coverage.length, not_covered.length, words.length] |
|
end |
|
|
|
# == INKY == |
|
|
|
# Actual: |
|
# next_word(poss_words({"BOAT" => 0}, letters2words)) #=> ["MIRE", ["EMIR", "IMER", "MIRE", "REIM", "REMI", "RIEM", "RIME"], ["E", "I", "R", "M"], 789, 98, 887] |
|
# next_word(poss_words({"BOAT" => 0, "RUIN" => 2}, letters2words)) #=> ["LINE", ["LIEN", "LINE", "NEIL", "NILE"], ["I", "N", "E", "L"], 288, 29, 317] |
|
# next_word(poss_words({"BOAT" => 0, "RUIN" => 2, "DEFY" => 1}, letters2words)) #=> ["MIRE", ["EMIR", "IMER", "MIRE", "REIM", "REMI", "RIEM", "RIME"], ["I", "E", "R", "M"], 152, 6, 158] |
|
# next_word(poss_words({"BOAT" => 0, "RUIN" => 2, "DEFY" => 1, "PUNY" => 2}, letters2words)) #=> ["UNZE", ["UNZE", "ZENU"], ["N", "U", "E", "Z"], 31, 0, 31] |
|
# poss_words({"BOAT" => 0, "RUIN" => 2, "DEFY" => 1, "PUNY" => 2}, letters2words) #=> ["DUNG", "DUNK", "DUNS", "MUND", "GENU", "JUNE", "LUNE", "MENU", "NUKE", "SUNE", "UNZE", "ZENU", "FUNK", "INKY", "INLY", "LINY", "MINY", "NIZY", "VINY", "WINY", "UILY", "YUKI", "JURY", "PIND", "PIEN", "PINE", "PERN", "PERU", "PRUE", "PURE", "PFUI"] |
|
# next_word(["DUNG", "DUNK", "JUNE", "MENU", "NUKE", "FUNK", "INKY", "JURY", "PINE", "PERN", "PERU","PRUE", "PURE"]) #=> ["FUNK", ["FUNK"], ["U", "N", "K", "F"], 13, 0, 13] |
|
# next_word(poss_words({"BOAT" => 0, "RUIN" => 2, "DEFY" => 1, "PUNY" => 2, "DUMP" => 0}, letters2words)) #=> ["LINY", ["INLY", "LINY"], ["Y", "N", "I", "L"], 6, 0, 6] |
|
# poss_words({"BOAT" => 0, "RUIN" => 2, "DEFY" => 1, "PUNY" => 2, "DUMP" => 0}, letters2words) #=> ["INKY", "INLY", "LINY", "NIZY", "VINY", "WINY"] |
|
# => INKY |
|
|
|
# Suggested: |
|
# irb(main):578:0> next_word(poss_words({"MALE" => 0}, letters2words)) #=> ["TYRO", ["ROYT", "RYOT", "TORY", "TROY", "TYRO"], ["O", "R", "T", "Y"], 685, 180, 865] |
|
# irb(main):579:0> next_word(poss_words({"MALE" => 0, "TROY" =>1}, letters2words)) #=> ["DION", ["DION", "NODI", "ODIN"], ["O", "N", "I", "D"], 318, 47, 365] |
|
# irb(main):580:0> next_word(poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2}, letters2words)) #=> ["NOUS", ["NOSU", "NOUS", "ONUS"], ["N", "O", "S", "U"], 108, 8, 116] |
|
# irb(main):581:0> next_word(poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1}, letters2words)) #=> ["WINT", ["TWIN", "WINT"], ["I", "N", "T", "W"], 48, 1, 49] |
|
# irb(main):582:0> next_word(poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TWIN"=> 1}, letters2words)) #=> ["COIF", ["COIF", "FICO", "FOCI"], ["O", "I", "C", "F"], 18, 0, 18] |
|
# irb(main):583:0> next_word(poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TWIN"=> 1, "COIF" => 1}, letters2words)) #=> ["DOWP", ["DOWP"], ["D", "W", "O", "P"], 3, 0, 3] |
|
# irb(main):584:0> poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TWIN"=> 1, "COIF" => 1}, letters2words) #=> ["DHOW", "DOWP", "SIDY"] |
|
# irb(main):585:0> poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TWIN"=> 1,}, letters2words) #=> ["CIPO", "CIXO", "COIF", "COIX", "FICO", "FOCI", "GOBI", "HOPI", "ICHO", "KIHO", "KOBI", "KOPI", "PICO", "ZOIC", "DHOW", "DOWF", "DOWP", "SIDY"] |
|
# irb(main):586:0> poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, }, letters2words) #=> ["DHOW", "DOCK", "DOWF", "DOWP", "CIPO", "CIXO", "COIF", "COIX", "FICO", "FOCI", "GOBI", "HOPI", "ICHO", "KIHO", "KOBI", "KOPI", "PICO", "ZOIC", "WYND", "BIRN", "BRIN", "CRIN", "FIRN", "GIRN", "GRIN", "KIRN", "PIRN", "RING", "RINK", "BINT", "HINT", "KNIT", "PINT", "THIN", "TING", "TINK", "TWIN", "VINT", "WINT", "INBY", "INKY", "NIZY", "PINY", "PYIN", "VINY", "WINY", "STID", "SIDY", "DUIT"] |
|
# irb(main):587:0> next_word(["DOCK", "COIF", "FOCI", "PICO", "FIRN", "GRIN", "RING", "RINK", "HINT", "KNIT", "PINT", "THIN", "TINK", "TWIN", "INKY", "VINY",]) #=> ["TINK", ["KNIT", "TINK"], ["I", "N", "T", "K"], 16, 0, 16] |
|
# irb(main):588:0> poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TINK" => 3}, letters2words) #=> ["BINT", "HINT", "PINT", "THIN", "TING", "TWIN", "VINT", "WINT", "KIRN", "RINK", "INKY"] |
|
# irb(main):589:0> next_word(poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TINK" => 3}, letters2words)) #=> ["TWIN", ["TWIN", "WINT"], ["N", "I", "T", "W"], 11, 0, 11] |
|
# irb(main):590:0> poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TINK" => 3, "TWIN" => 2}, letters2words) #=> ["KIRN", "RINK", "INKY"] |
|
# irb(main):591:0> next_word(poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TINK" => 3, "TWIN" => 2}, letters2words)) #=> ["RINK", ["KIRN", "RINK"], ["N", "K", "I", "R"], 3, 0, 3] |
|
# irb(main):592:0> next_word(poss_words({"MALE" => 0, "TROY" =>1, "ODIN" => 2, "ONUS" => 1, "TINK" => 3, "TWIN" => 2, "RINK" => 3}, letters2words)) #=> ["INKY", ["INKY"], ["Y", "N", "K", "I"], 1, 0, 1] |
|
|
|
# == AUTO == |
|
|
|
# Actual: |
|
# 1. next_word(poss_words({"DACE"=>1,}, letters2words)) #=> ["LARI", ["ARIL", "LAIR", "LARI", "LIAR", "LIRA", "RAIL", "RIAL"], ["A", "R", "I", "L"], 1603, 399, 2002] |
|
# 2. next_word(poss_words({"DACE"=>1,"FORK" => 1}, letters2words)) #=> ["POTE", ["PETO", "POET", "POTE", "TOPE"], ["E", "T", "O", "P"], 209, 81, 290] |
|
# 3. next_word(poss_words({"DACE"=>1,"FORK" => 1,"PUCE"=>1}, letters2words)) #=> ["REST", ["REST", "SERT", "STRE"], ["E", "T", "R", "S"], 114, 10, 124] |
|
# 4. next_word(poss_words({"DACE"=>1,"FORK" => 1,"PUCE"=>1, "FAST" => 2}, letters2words)) #=> ["URSA", ["RUSA", "SAUR", "SURA", "URSA", "USAR"], ["A", "R", "U", "S"], 31, 1, 32] |
|
# 5. next_word(poss_words({"DACE"=>1,"FORK" => 1,"PUCE"=>1, "FAST" => 2, "SOAP" => 2}, letters2words)) #=> ["SURA", ["RUSA", "SAUR", "SURA", "URSA", "USAR"], ["A", "R", "U", "S"], 15, 0, 15] |
|
# 6. next_word(poss_words({"DACE"=>1,"FORK" => 1,"PUCE"=>1, "FAST" => 2, "SOAP" => 2, "PART" => 2}, letters2words)) #=> ["USAR", ["RUSA", "SAUR", "SURA", "URSA", "USAR"], ["U", "A", "S", "R"], 6, 0, 6] |
|
# 7. next_word(poss_words({"DACE"=>1,"FORK" => 1,"PUCE"=>1, "FAST" => 2, "SOAP" => 2, "PART" => 2, "COST" => 2,}, letters2words)) #=> ["AUTO", ["AUTO"], ["U", "T", "O", "A"], 1, 0, 1] |
|
# 8. next_word(poss_words({"DACE"=>1,"FORK" => 1,"PUCE"=>1, "FAST" => 2, "SOAP" => 2, "PART" => 2, "COST" => 2, "REST" => 1}, letters2words)) #=> ["AUTO", ["AUTO"], ["U", "T", "O", "A"], 1, 0, 1] |
|
# 9. auto |
|
|
|
# Suggested: |
|
# words |
|
# next_word(words) #=> ["MALE", ["ALEM", "ALME", "LAME", "LEAM", "MALE", "MALE", "MEAL", "MELA"], ["A", "E", "L", "M"], 2970, 2139, 5272] |
|
# 1. next_word(poss_words({"MALE" => 1}, letters2words)) #=> ["VAIR", ["RAVI", "RIVA", "VAIR", "VARI", "VIRA"], ["A", "R", "I", "V"], 1340, 501, 1841] |
|
# 2. next_word(poss_words({"MALE" => 1, "VAIR" => 1}, letters2words)) #=> ["NAUT", ["ANTU", "AUNT", "NAUT", "TAUN", "TUAN", "TUNA"], ["A", "N", "U", "T"], 559, 112, 671] |
|
# 3. next_word(poss_words({"MALE" => 1, "VAIR" => 1, "AUNT" => 3}, letters2words)) #=> ["NAWT", ["NAWT", "TAWN", "WANT"], ["A", "N", "T", "W"], 7, 0, 7] |
|
# poss_words({"MALE" => 1, "VIRA" => 1, "AUNT" => 3},letters2words) #=> ["BANT", "CANT", "FANT", "GANT", "GNAT", "HANT", "NAST", "NAWT", "PANT", "SANT", "STAN", "TANG", "TANH", "TANK", "TANO", "TAWN", "THAN", "WANT", "ZANT", "ANSU", "ANUS", "BAUN", "BUNA", "CUNA", "FAUN", "GAUN", "GUAN", "GUNA", "JAUN", "JUAN", "KUAN", "NABU", "NAPU", "NUBA", "NUDA", "PUAN", "PUNA", "QUAN", "UANG", "UNCA", "UZAN", "YUAN", "ABUT", "ACTU", "AUTO", "DAUT", "GAUT", "PATU", "PAUT", "QUAT", "SAUT", "TABU", "TAKU", "TAPU", "TASU", "TUBA", "TUFA", "TUZA", "UTAH", "UTAS"] |
|
# next_word(["GNAT", "PANT", "TANG", "TANK", "THAN", "WANT", "ANUS", "AUTO", "TUBA"]) #=> ["TANG", ["GNAT", "TANG"], ["A", "T", "N", "G"], 9, 0, 9] |
|
# poss_words({"MALE" => 1, "VIRA" => 1, "AUNT" => 3, "TANG" => 2},letters2words) #=> ["ANSU", "ANUS", "BAUN", "BUNA", "CUNA", "FAUN", "JAUN", "JUAN", "KUAN", "NABU", "NAPU", "NUBA", "NUDA", "PUAN", "PUNA", "QUAN", "UNCA", "UZAN", "YUAN", "ABUT", "ACTU", "AUTO", "DAUT", "PATU", "PAUT", "QUAT", "SAUT", "TABU", "TAKU", "TAPU", "TASU", "TUBA", "TUFA", "TUZA", "UTAH", "UTAS"] |
|
# 4. next_word(poss_words({"MALE" => 1, "VIRA" => 1, "AUNT" => 3, "TANG" => 2},letters2words)) #=> ["BUNA", ["BAUN", "BUNA", "NABU", "NUBA"], ["U", "A", "N", "B"], 36, 0, 36] |
|
# poss_words({"MALE" => 1, "VIRA" => 1, "AUNT" => 3, "TANG" => 2, "BUNA" => 2},letters2words) => ["ACTU", "AUTO", "DAUT", "PATU", "PAUT", "QUAT", "SAUT", "TAKU", "TAPU", "TASU", "TUFA", "TUZA", "UTAH", "UTAS"] |
|
# 5. => auto |
|
# |
|
# words_h |
|
# irb(main):607:0> next_word(words_h) #=> ["LENA", ["LANE", "LEAN", "LENA", "NEAL", "ELAN", "LANE", "LEAN"], ["A", "E", "L", "N"], 1828, 1151, 3198] |
|
# irb(main):610:0> next_word(poss_words({"LENA" => 1}, letters2words(words_h))) #=> ["RAPT", ["PART", "PRAT", "RAPT", "TARP", "TRAP"], ["A", "R", "T", "P"], 703, 331, 1034] |
|
# irb(main):611:0> next_word(poss_words({"LENA" => 1, "RAPT" => 2}, letters2words(words_h))) #=> ["ARMY", ["MARY", "MYRA", "ARMY"], ["A", "R", "M", "Y"], 211, 10, 221] |
|
# irb(main):612:0> next_word(poss_words({"LENA" => 1, "RAPT" => 2, "ARMY" => 1}, letters2words(words_h))) #=> ["TWAS", ["SWAT", "WATS", "TWAS", "WAST"], ["T", "A", "S", "W"], 93, 12, 105] |
|
# irb(main):613:0> next_word(poss_words({"LENA" => 1, "RAPT" => 2, "ARMY" => 1, "TWAS" => 2}, letters2words(words_h))) #=> ["COAT", ["CATO", "COAT", "TACO"], ["A", "T", "O", "C"], 41, 0, 41] |
|
# irb(main):614:0> next_word(poss_words({"LENA" => 1, "RAPT" => 2, "ARMY" => 1, "TWAS" => 2, "COAT" => 3}, letters2words(words_h))) #=> ["GOAT", ["GOAT", "TOGA"], ["T", "A", "O", "G"], 13, 0, 13] |
|
# irb(main):615:0> next_word(poss_words({"LENA" => 1, "RAPT" => 2, "ARMY" => 1, "TWAS" => 2, "COAT" => 3, "GOAT" => 3}, letters2words(words_h))) #=> ["AUTO", ["AUTO"], ["T", "O", "A", "U"], 6, 0, 6] |