Last active
August 29, 2015 13:57
-
-
Save xbeta/9557090 to your computer and use it in GitHub Desktop.
Find all words from a dictionary that are 1 edited distance away.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# | |
# Find all words from a dictionary that are 1 edited distance away. | |
# | |
# dictionary = ['bat', 'batt', 'cat', 'beetle'] | |
# similar(q) => all words in dictionary that are edit distance <= 1 from q | |
# edits: adding a letter, changing, or deleting | |
# | |
# similar('bat') => ['bat', 'batt', 'cat'] | |
# similar('beatle') => ['beetle'] | |
# similar('beetles') => ['beetle'] | |
# | |
# 1. gen every possible word edit distance <=1 from q | |
# - gen every addition | |
# - gen every deletion | |
# - gen every change | |
# 2. check for membership from that list in the dictionary (set) | |
@LETTERS = ("a".."z").to_a.join | |
def load_dict() | |
dictionary = [] | |
# Assume you are on UNIX-like system | |
File.open("/usr/share/dict/words") do |file| | |
file.each do |line| | |
dictionary << line.strip | |
end | |
end | |
dictionary | |
end | |
# Converting an array of dictionary to hashmap to use it later for fast lookup | |
def to_hashmap(dict_array) | |
map = Hash.new(1) | |
dict_array.each do |word| | |
map[word] += 1 | |
end | |
map | |
end | |
def perm_transposition(word) | |
n = word.length | |
(0...n-1).collect do |i| | |
word[0...i] + word[i+1,1] + word[i,1] + word[i+2..-1] | |
end | |
end | |
# Shifting characters in different position | |
def perm_alteration(word) | |
n = word.length | |
alteration = [] | |
n.times do |i| | |
# each_byte is used for ASCII representation of each char | |
# using English characters 'a-z' to generate it | |
@LETTERS.each_byte do |ascii_char| | |
alteration << word[0...i] + ascii_char.chr + word[i+1..-1] | |
end | |
end | |
alteration | |
end | |
def perm_addition(word) | |
n = word.length | |
addition = [] | |
(n+1).times do |i| | |
# each_byte is used for ASCII representation of each char | |
# using English characters 'a-z' to generate it | |
@LETTERS.each_byte do |ascii_char| | |
addition << word[0...i] + ascii_char.chr + word[i..-1] | |
end | |
end | |
addition | |
end | |
def perm_deletion(word) | |
n = word.length | |
(0...n).collect do |i| | |
word[0...i] + word[i+1..-1] | |
end | |
end | |
def permutation(word) | |
# adding all various permutation together | |
perms = perm_deletion(word) + perm_transposition(word) + perm_alteration(word) + perm_addition(word) | |
perms.uniq! # remove any duplicates | |
perms.empty? ? nil : perms | |
end | |
def find(word) | |
dict = load_dict | |
map = to_hashmap(dict) | |
result = [] | |
permutation(word).each do|perm_word| | |
result << perm_word if map.has_key?(perm_word) | |
end | |
result.empty? ? "Not found: #{word}" : result | |
end | |
start = Time.now | |
puts find("bat") # tries to find it | |
finish = Time.now | |
puts "time duration: #{finish - start} s" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment