Created
December 22, 2011 01:56
-
-
Save chriskillpack/1508555 to your computer and use it in GitHub Desktop.
Word trie
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A vanilla Trie class. | |
# Only interesting thing is gather_words_along_path. | |
class Trie | |
def initialize(filename = nil) | |
@trie = [{}, false] | |
@word_count = 0 | |
end | |
def to_s | |
"words: #{@word_count}" | |
end | |
def add_word(word) | |
cursor = @trie | |
word.split('').each do |chr| | |
if not cursor[0].has_key?(chr) | |
cursor[0][chr] = [{}, false] | |
end | |
cursor = cursor[0][chr] | |
end | |
cursor[1] = true | |
@word_count += 1 | |
end | |
def word_count | |
@word_count | |
end | |
def has_word?(word) | |
cursor = @trie | |
word.split('').each do |letter| | |
return false unless cursor[0].has_key?(letter) | |
cursor = cursor[0][letter] | |
end | |
cursor[1] | |
end | |
def gather_words_along_path(path) | |
words = [] | |
cursor = @trie | |
path.split('').each_with_index do |letter, index| | |
return words unless cursor[0].has_key?(letter) | |
words << path[0...index] if cursor[1] | |
cursor = cursor[0][letter] | |
end | |
words << path if cursor[1] | |
words | |
end | |
def populate_from_file(filename) | |
counter = 0 | |
File.open(filename) do |file| | |
file.each_line do |line| | |
p counter if (((counter += 1) % 10000) == 0) | |
line.chomp! | |
# Ignore short words and proper nouns. | |
add_word(line.downcase) unless line.length < 3 || line.match(/^[A-Z]/) | |
end | |
end | |
self | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment