Created
May 29, 2011 11:29
-
-
Save johncrisostomo/997670 to your computer and use it in GitHub Desktop.
Text Analyzer with error handling and logging
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'logger' | |
$LOG = Logger.new('analyzer.log', 'monthly') | |
def document_to_string(filename) | |
$LOG.debug("File name : #{filename}") | |
text = '' | |
begin | |
File.open(filename) do |f| | |
while line = f.gets | |
text << line | |
end | |
end | |
rescue Exception => e | |
$LOG.error "File not found! #{e}" | |
puts "#{e}" | |
exit | |
end | |
text | |
end | |
def character_count(text) | |
text.length | |
end | |
def character_count_without_spaces(text) | |
text.gsub(/ /,'').length | |
end | |
def line_count(text) | |
text.split("\n").size | |
end | |
def word_count(text) | |
words = text.split(' ') | |
words.size | |
end | |
def sentence_count(text) | |
sentences = text.gsub(/ /,'') | |
no_punctuations = sentences.gsub(/[\.\?\!\;]/, ' ') | |
no_punctuations.split(' ').size | |
end | |
def paragraph_count(text) | |
text.split("\n\n").size | |
end | |
def calculate_average_number_of_words_per_sentence(words, sentences) | |
words/sentences.to_f | |
end | |
def calculate_average_number_of_sentences_per_paragraph(sentences, paragraphs) | |
sentences/paragraphs.to_f | |
end | |
print "Enter filename : " | |
fname = gets.chomp! | |
sample = document_to_string(fname) | |
average_words = calculate_average_number_of_words_per_sentence(word_count(sample), sentence_count(sample.gsub(/\n/, ''))) | |
average_sentences = calculate_average_number_of_sentences_per_paragraph(sentence_count(sample.gsub(/\n/, '')), paragraph_count(sample)) | |
puts "Analyzing #{fname} . . .\n" | |
puts "The number of characters is #{character_count(sample)}" | |
puts "The number of characters (without spaces) is #{character_count_without_spaces(sample.gsub(/\n/, ''))}" | |
puts "The number of lines is #{line_count(sample)}" | |
puts "The number of words is #{word_count(sample)}" | |
puts "The number of sentences is #{sentence_count(sample.gsub(/\n/, ''))}" | |
puts "The number of paragraphs is #{paragraph_count(sample)}" | |
puts "The average number of words in a sentence is %.2f" % average_words | |
puts "The average number of sentences in a paragraph is %.2f" % average_sentences |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment