Skip to content

Instantly share code, notes, and snippets.

@makimoto
Created May 3, 2022 17:47
Show Gist options
  • Save makimoto/b81a8fa769dc49e829811bd4b7c33c40 to your computer and use it in GitHub Desktop.
Save makimoto/b81a8fa769dc49e829811bd4b7c33c40 to your computer and use it in GitHub Desktop.
# extract_5chars.rb
# extract effective 5-character words for the initial word of Wordle.
#
# Written by: Shimpei Makimoto
# License: MIT License https://makimoto.mit-license.org/
#
# usage: ruby extract_5chars.rb <input_file>
# The input_file is a list of words that are separated by newline.
# such as: https://github.com/tabatkins/wordle-list/blob/main/words
# https://dumps.wikimedia.org/enwiktionary/
WORD_LENGTH = 5
FREQ_CHARACTERS_THRESHOLD = 5
LISTED_CANDIDATES_THRESHOLD = 50
words = []
ARGF.each do |l|
l.chomp!
if l =~ /\A[a-z]{#{WORD_LENGTH}}\z/
words << l
end
end
puts "words: #{words.size}"
char_counts = Array.new(WORD_LENGTH) { Hash.new(0) }
words.each do |w|
w.each_char.with_index do |c, i|
char_counts[i][c] += 1
end
end
chars_order_by_freq = Array.new(WORD_LENGTH) { [] }
char_counts.each_with_index do |char_count, i|
puts "\nchar_count[#{i}]:"
char_count.sort {|a, b| b[1] <=> a[1] }.first(FREQ_CHARACTERS_THRESHOLD).each do |c|
chars_order_by_freq[i] << c[0]
puts "#{c[0]}: #{c[1]}"
end
end
found_words = []
chars_order_by_freq[0].each_with_index do |c0, i0|
chars_order_by_freq[1].each_with_index do |c1, i1|
chars_order_by_freq[2].each_with_index do |c2, i2|
chars_order_by_freq[3].each_with_index do |c3, i3|
chars_order_by_freq[4].each_with_index do |c4, i4|
word = [c0, c1, c2, c3, c4]
index = [i0, i1, i2, i3, i4]
if words.include?(word.join) && word.uniq.size == WORD_LENGTH
found_words << [index, word]
end
end
end
end
end
end
puts "\nfound_words:"
found_words.sort_by {|w| w[0].inject(:+) }.first(LISTED_CANDIDATES_THRESHOLD).each do |w|
puts "#{w[0].join}: #{w[1].join}"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment