Created
December 14, 2013 22:17
-
-
Save sjwats/7965664 to your computer and use it in GitHub Desktop.
TDD Word Analytics - Given a long string or set of paragraphs, perform some analysis that provides the following: Number of each word
Number of each letter
Number of each symbol (any non-letter and non-digit character, excluding white space)
Top three most common words
Top three most common letters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class WordAnalytics | |
| def initialize(stringed_words) | |
| @stringed_words = stringed_words | |
| end | |
| def separate_words | |
| @words = [] | |
| split_words = [] | |
| split_words = @stringed_words.split(' ') | |
| split_words.each do |word| | |
| @words << word.downcase.gsub(/\W/, '') | |
| end | |
| @words | |
| end | |
| def count_words | |
| @word_count = {} | |
| @words.each do |word| | |
| if @word_count.keys.include?(word) | |
| @word_count[word] += 1 | |
| else | |
| @word_count[word] = 1 | |
| end | |
| end | |
| @word_count | |
| end | |
| def count_letters | |
| @letter_count = {} | |
| @stringed_words.gsub(/\W/, '').split('').each do |letter| | |
| letter.downcase! | |
| if @letter_count.keys.include?(letter) | |
| @letter_count[letter] += 1 | |
| else | |
| @letter_count[letter] = 1 | |
| end | |
| end | |
| @letter_count | |
| end | |
| def count_symbols | |
| symbol_count = {} | |
| @stringed_words.gsub(/\w/, '').split('').each do |symbol| | |
| if symbol_count.keys.include?(symbol) | |
| symbol_count[symbol] += 1 | |
| else | |
| symbol_count[symbol] = 1 unless symbol == " " | |
| end | |
| end | |
| symbol_count | |
| end | |
| def common_words | |
| top_three_words = {} | |
| sorted_words = @word_count.sort_by {|key,value| value}.reverse | |
| sorted_words = sorted_words[0..2] | |
| sorted_words.each do |index| | |
| top_three_words[index[0]] = index[1] | |
| end | |
| top_three_words | |
| end | |
| def common_letters | |
| top_three_letters = {} | |
| sorted_letters = @letter_count.sort_by {|key,value| value}.reverse | |
| sorted_letters = sorted_letters[0..2] | |
| sorted_letters.each do |index| | |
| top_three_letters[index[0]] = index[1] | |
| end | |
| top_three_letters | |
| end | |
| def display_results | |
| separate_words | |
| puts "The number of each word is #{count_words}" | |
| puts "The number of each letter is #{count_letters}" | |
| puts "The number of each symbol is #{count_symbols}" | |
| puts "The top three most common words are #{common_words}" | |
| puts "The top three most common letters are #{common_letters}" | |
| end | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'rspec' | |
| require_relative 'word_analytics' | |
| describe WordAnalytics do | |
| it 'should separate words from symbols' do | |
| word_analytics = WordAnalytics.new("How, how, now brown cow! cow cow") | |
| expect(word_analytics.separate_words).to eql(["how", "how", "now", "brown", "cow", "cow", "cow"]) | |
| end | |
| it 'should count words' do | |
| word_analytics = WordAnalytics.new("How, how, now brown cow! cow cow") | |
| word_analytics.separate_words | |
| expect(word_analytics.count_words).to eql({"how"=>2, "now"=>1, "brown"=>1, "cow"=>3}) | |
| end | |
| it 'should count letters' do | |
| word_analytics = WordAnalytics.new("How, how, now brown cow! cow cow") | |
| expect(word_analytics.count_letters).to eql({"h"=>2, "o"=>7, "w"=>7, "n"=>2, "b"=>1, "r"=>1, "c"=>3}) | |
| end | |
| it 'should count symbols without including any non-letter and non-digit character or white space' do | |
| word_analytics = WordAnalytics.new("How, how, now brown cow! cow cow") | |
| expect(word_analytics.count_symbols).to eql({","=>2, "!"=>1}) | |
| end | |
| it 'should provide the three most common words' do | |
| word_analytics = WordAnalytics.new("How, how, now brown cow! cow cow") | |
| word_analytics.separate_words | |
| word_analytics.count_words | |
| expect(word_analytics.common_words).to eql({"cow"=>3, "how"=>2, "now"=>1}) | |
| end | |
| it 'should provide the three most common letters' do | |
| word_analytics = WordAnalytics.new("How, how, now brown cow! cow cow") | |
| word_analytics.count_letters | |
| expect(word_analytics.common_letters).to eql({"w"=>7, "o"=>7, "c"=>3}) | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment