Created
September 5, 2011 13:36
-
-
Save jubstuff/1195007 to your computer and use it in GitHub Desktop.
Simple script that count the number of occurrences of a word in a string (from Programming Ruby 1.9)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# word_frequency.rb | |
# | |
# simple script that count the number of occurrences of a word in a string | |
# | |
# | |
def word_from_string(string) | |
#scan return an array of substring matching a given pattern | |
string.downcase.scan(/[\w']+/) | |
end | |
def count_frequency(word_list) | |
counts = Hash.new(0) | |
for word in word_list | |
counts[word] += 1 | |
end | |
counts | |
end | |
raw_text = File.read("para.txt") | |
word_list = word_from_string(raw_text) | |
counts = count_frequency(word_list) | |
sorted = counts.sort_by {|word, count| count } | |
top_5 = sorted.last(5) | |
puts "\nTop 5" | |
# ugly - too intimate with the array. It knows its size and its structure | |
for i in 0...5 | |
word = top_5[i][0] | |
count = top_5[i][1] | |
puts "#{word}: #{count}" | |
end | |
puts "\nTop 5" | |
# better - uses iterator and blocks to achieve the same result | |
top_5.each do |word, count| | |
puts "#{word}: #{count}" | |
end | |
puts "\nTop 5" | |
# better - using map to invoke an operation on each array's element | |
top_5.map { |word, count| puts "#{word}: #{count}" } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment