Skip to content

Instantly share code, notes, and snippets.

@jmhodges
Created November 17, 2009 23:21
Show Gist options
  • Save jmhodges/237370 to your computer and use it in GitHub Desktop.
Save jmhodges/237370 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
two_words = Hash.new(0)
three_words = Hash.new(0)
def windowed_words_to_hash(words, word_hash)
i = 0
while i < words.size-1
key = [words[i], words[i+1]]
word_hash[key] += count
i += 1
end
end
def add_count(query, num, hsh)
query.inject([]) do |sum, word|
sum << word
hsh[sum] += num
sum
end
end
def dump_word_hash(hsh, filename)
dump = hsh.map{|words,count| [words.join(' '), count].join("\t") }
File.open(filename, 'a') do |f|
dump.each{|line| f.puts(line) }
end
end
File.open('searched_terms.txt') do |f|
f.readlines.map do |line|
words, count = line.chomp.split('\t')
words = words.split(/\W+/)
windowed_words_to_hash(words, two_words) if words.size >= 2
windowed_words_to_hash(words, three_words) if words.size >= 3
end
end
my_query = ['Best', 'Buy']
add_count(my_query, 3, two_words)
dump_word_hash(two_words, 'two_words')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment