Created
November 25, 2011 02:30
-
-
Save evandrix/1392682 to your computer and use it in GitHub Desktop.
Anagram finder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/****************************************************************************** | |
* This function is a code snippet. It can be freely used and distributed. | |
* Author: irfan[dot]hamid[at]gmail[at]com | |
* | |
* This function takes two ASCII strings in C syntax (null-terminated | |
* character arrays) and determines whether they are anagrams or not. | |
* | |
* Implementation notes: | |
* The function contains a histogram that stores the frequency of each | |
* character it encounters in the first string. For each character of the | |
* second string, it decrements the corresponding entry in the histogram. If | |
* before every decrement, the value of the histogram bucket reaches zero, | |
* then the two strings are not anagrams, as there is some character that has | |
* more occurrences in the second string than in the first string. | |
* | |
* If either of the two strings contains a non-alphabetic character, the | |
* anagram property is considered to be violated. | |
* | |
* Remarks: | |
* It is an efficient implementation because: | |
* o It is in-place, the original strings are not copied, no alloc or copy | |
* o Does not clobber the original strings | |
* o It is a linear time implementation | |
* | |
* Returns: | |
* True if the strings are anagrams, false otherwise. | |
******************************************************************************/ | |
bool is_anagram (char w1[], char w2[]) | |
{ | |
unsigned int i, sz; | |
/* The histogram */ | |
int freqtbl[26]; | |
/* Sanity check */ | |
if ((sz = strlen(w1)) != strlen(w2)) | |
return false; | |
/* Initialize the histogram */ | |
bzero(freqtbl, 26*sizeof(int)); | |
/* Read the first string, incrementing the corresponding histogram entry */ | |
for (i = 0; i < sz; i++) { | |
if (w1[i] >= 'A' && w1[i] <= 'Z') | |
freqtbl[w1[i]-'A']++; | |
else if (w1[i] >= 'a' && w1[i] <= 'z') | |
freqtbl[w1[i]-'a']++; | |
else | |
return false; | |
} | |
/* Read the second string, decrementing the corresponding histogram entry */ | |
for (i = 0; i < sz; i++) { | |
if (w2[i] >= 'A' && w2[i] <= 'Z') { | |
if (freqtbl[w2[i]-'A'] == 0) | |
return false; | |
freqtbl[w2[i]-'A']--; | |
} else if (w2[i] >= 'a' && w2[i] <= 'z') { | |
if (freqtbl[w2[i]-'a'] == 0) | |
return false; | |
freqtbl[w2[i]-'a']--; | |
} else { | |
return false; | |
} | |
} | |
return true; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# first create a word list file at /Users/Shared/words: | |
mkdir -p /Users/Shared | |
sudo chmod 1777 /Users/Shared | |
sudo cp /usr/share/dict/words /Users/Shared/words | |
sudo chown $(logname):$(logname) /Users/Shared/words | |
sudo chmod 666 /Users/Shared/words | |
# or | |
cd /Users/Shared | |
curl -L -o words http://pragdave.pragprog.com/data/wordlist.txt | |
#--------------------------------------------- | |
# cat anagram.rb | |
#!/usr/local/bin/ruby -w | |
# Based on: | |
# Solving Anagrams in Ruby, | |
# http://lojic.com/blog/2007/10/22/solving-anagrams-in-ruby/ | |
# Author: Brian Adkins | |
word_list_file = "/Users/Shared/words" | |
word_hash_file = "/Users/Shared/word_hash" | |
exit 1 unless File.exist?(word_list_file) | |
1.times do | |
if not File.exist?(File.expand_path(word_hash_file)) | |
words = Hash.new([]) | |
File.open(word_list_file).each_line do |w| | |
w = w.chomp | |
# create a hash with sorted letters as keys and words as values | |
# example: "alst" => ["last", "salt", "slat"] | |
word = w.downcase | |
sorted_word = word.split('').sort!.join('').delete(" ") | |
words[sorted_word] |= [w] # |= makes sure array elements in [w] are unique | |
end | |
#words.delete_if {|key, value| value.size < 2 } | |
#words.sort_by {|k, v| k.size }.last(10).each { |item| p item } # print 10 largest anagrams | |
#words.each_pair { |k,v| puts "#{k.inspect} => #{v.inspect}" if v.size > 2 } | |
#words.each_value { |v| p v if v.size > 2 } | |
#p words.sort_by {|k, v| v.size }.last # largest anagram set | |
File.open(File.expand_path(word_hash_file), "w") do |file| | |
Marshal.dump(words, file) | |
end | |
redo | |
else | |
words = nil | |
File.open(File.expand_path(word_hash_file), "r") do |file| | |
words = Marshal.load(file) | |
end | |
while true | |
print "Enter word: " | |
anagram = gets.chomp | |
exit 0 if anagram == "exit" | |
anagram = anagram.downcase | |
sorted_anagram = anagram.split('').sort!.join('').delete(" ") | |
p words[sorted_anagram] | |
end | |
end | |
end | |
#--------------------------- | |
# adding new words to the word_hash file from the command line: | |
rm -f /Users/Shared/word_hash | |
function add_words() { printf "%s\n" "$@" >> /Users/Shared/words; } | |
add_words London "New York" | |
# ... and then run anagram.rb again |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment