Created
May 3, 2022 17:47
-
-
Save makimoto/b81a8fa769dc49e829811bd4b7c33c40 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# extract_5chars.rb | |
# extract effective 5-character words for the initial word of Wordle. | |
# | |
# Written by: Shimpei Makimoto | |
# License: MIT License https://makimoto.mit-license.org/ | |
# | |
# usage: ruby extract_5chars.rb <input_file> | |
# The input_file is a list of words that are separated by newline. | |
# such as: https://github.com/tabatkins/wordle-list/blob/main/words | |
# https://dumps.wikimedia.org/enwiktionary/ | |
WORD_LENGTH = 5 | |
FREQ_CHARACTERS_THRESHOLD = 5 | |
LISTED_CANDIDATES_THRESHOLD = 50 | |
words = [] | |
ARGF.each do |l| | |
l.chomp! | |
if l =~ /\A[a-z]{#{WORD_LENGTH}}\z/ | |
words << l | |
end | |
end | |
puts "words: #{words.size}" | |
char_counts = Array.new(WORD_LENGTH) { Hash.new(0) } | |
words.each do |w| | |
w.each_char.with_index do |c, i| | |
char_counts[i][c] += 1 | |
end | |
end | |
chars_order_by_freq = Array.new(WORD_LENGTH) { [] } | |
char_counts.each_with_index do |char_count, i| | |
puts "\nchar_count[#{i}]:" | |
char_count.sort {|a, b| b[1] <=> a[1] }.first(FREQ_CHARACTERS_THRESHOLD).each do |c| | |
chars_order_by_freq[i] << c[0] | |
puts "#{c[0]}: #{c[1]}" | |
end | |
end | |
found_words = [] | |
chars_order_by_freq[0].each_with_index do |c0, i0| | |
chars_order_by_freq[1].each_with_index do |c1, i1| | |
chars_order_by_freq[2].each_with_index do |c2, i2| | |
chars_order_by_freq[3].each_with_index do |c3, i3| | |
chars_order_by_freq[4].each_with_index do |c4, i4| | |
word = [c0, c1, c2, c3, c4] | |
index = [i0, i1, i2, i3, i4] | |
if words.include?(word.join) && word.uniq.size == WORD_LENGTH | |
found_words << [index, word] | |
end | |
end | |
end | |
end | |
end | |
end | |
puts "\nfound_words:" | |
found_words.sort_by {|w| w[0].inject(:+) }.first(LISTED_CANDIDATES_THRESHOLD).each do |w| | |
puts "#{w[0].join}: #{w[1].join}" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment