rickhull · September 19, 2011 20:29
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/usr/bin/ruby
 # Text Analyzer
 # Author: Zach
 # Purpose: Utilize Ruby to analyze Text files
 # and generate statistical information therein.

 require 'cgi'

 STOPWORDS = File.read('conf/stop_words.txt').map{|x| x.chomp}

 # Count the characters; return a hash with stats
 def total_chars(text)
  text.length
 end

 def total_chars_minus_spaces(text)
  total_chars(text.gsub(/\s+/, ''))
 end

 # Count the words, sentences and paragraphs; return a hash with stats
 def count_chunks(text)
  {
    :word_count => text.split.length,
    :sent_count => text.split(/\.|\?|!/).length,
    :para_count => text.split(/\n\n/).length,
    :line_count => text.size,
    :keywords => text.split(/\s+/).select { |word| !STOPWORDS.include?(word) },
  }
 end

 def useful_words(word_count, keywords)
  {
    :pgw => ((keywords.length.to_f / word_count.to_f) * 100).to_i,
    :most_common_words => (keywords - STOPWORDS).group_by{ |x| x}.sort_by{ |word, hits| -hits.length}[0..9].map(&:first).join('-- ')
  }
 end

 # Display stats for the sentences
 def ideal_sentences(sentences)
  sentences_sorted = sentences.sort_by { |sentence| sentence.length }
  foo = sentences_sorted.length / 7
  ideal_sentences = sentences_sorted.slice(foo, foo + 1)
  { :ideal_sentences => ideal_sentences.select { |sentence| sentence =~/\sis\W|\sare\W/ } }
 end
 # Returns 
 def file_names(stats)
  { :names => stats.map{ |h| h[:text]} }
 end

 def build_reports (stats)
  reports = []  
  stats.each do |stat|
    report = <<-REPORT
    #{stat[:text]} Inaugural Speech - Analysis Results
    Total number of characters is: #{stat[:chars][:tot_chars]}.
    Total number of characters less whitespace is: #{stat[:chars][:tot_chars_no_space]}
    Total number of words is: #{stat[:chunks][:word_count]}.
    Total number of sentences is: #{stat[:chunks][:sent_count]}
    Total number of paragraphs is: #{stat[:chunks][:para_count]}
    The average sentences per paragraph is: #{stat[:chunks][:sent_count] / stat[:chunks][:para_count]}
    The average words per sentence is:  #{stat[:chunks][:word_count] / stat[:chunks][:sent_count]}
    #{stat[:words][:pgw]} % of all words in the text are non-fluff words.
    The ideal sentences are: #{stat[:sent][:ideal_sentences].join("-- ")}
    The top 10 most common words are: #{stat[:words][:most_common_words]}
    
    REPORT
    reports << report    
  end #end each-loop
  reports
 end #end build_reports

 def collect_stats
  file_stats = []
  Dir.glob("files/*.txt").each do |text_file| # Iterate through all text files
    # Local Variables / Text Files
    text = File.read(text_file).sub('/files/', '').sub('.txt', '')
    sentences = text.gsub(/\s+/, ' ').strip.split(/\.|\?|!/)

    # Count the characters
    char_stats = count_chars(text) #Assign returned results from count_chars method

    # Count the words, sentences and paragraphs
    chunk_stats = count_chunks(text) #Assign returned retults to chunk_stats

    # Figure out the most Useful Words
    word_stats = useful_words(chunk_stats[:word_count],chunk_stats[:keywords]) #assign results to word_stats

    # Call to ideal_sentences which will produce the ideal sentences from text
    best_sentences = ideal_sentences(sentences) #assign results to best_sentences

    # Collect our stats and stuff them in the results array
    file_stats << {
      :chars  => char_stats,
      :chunks => chunk_stats, 
      :words  => word_stats, 
      :sent   => best_sentences, 
      :text   => text_file,
    }
  end # End each-loop
  # Return the filestats at the end
  file_stats
 end
 # Call to collect_stats method containing calls to the functions that process the text files passed to var(text)
 stats = collect_stats
 # Process the file name(s) of the text files for use in the build_reports method
 names = file_names(stats)
 #Build reports that contain the values from our stats
 reports = build_reports(stats)

 header = <<HTML
 <html>
 <body>
 <pre>
 CS 132A Lab3
 Innagural Speech Analysis
 HTML

 footer = <<HTML
 </html>
 </body>
 </pre>
 HTML

 output = <<OUT
 #{header}
 #{reports}
 #{footer}
 OUT

 cgi = CGI.new
 cgi.out do
  output
 end
	#!/usr/bin/ruby
	# Text Analyzer
	# Author: Zach
	# Purpose: Utilize Ruby to analyze Text files
	# and generate statistical information therein.

	require 'cgi'

	STOPWORDS = File.read('conf/stop_words.txt').map{\|x\| x.chomp}

	# Count the characters; return a hash with stats
	def total_chars(text)
	text.length
	end

	def total_chars_minus_spaces(text)
	total_chars(text.gsub(/\s+/, ''))
	end

	# Count the words, sentences and paragraphs; return a hash with stats
	def count_chunks(text)
	{
	:word_count => text.split.length,
	:sent_count => text.split(/\.\|\?\|!/).length,
	:para_count => text.split(/\n\n/).length,
	:line_count => text.size,
	:keywords => text.split(/\s+/).select { \|word\| !STOPWORDS.include?(word) },
	}
	end

	def useful_words(word_count, keywords)
	{
	:pgw => ((keywords.length.to_f / word_count.to_f) * 100).to_i,
	:most_common_words => (keywords - STOPWORDS).group_by{ \|x\| x}.sort_by{ \|word, hits\| -hits.length}[0..9].map(&:first).join('-- ')
	}
	end

	# Display stats for the sentences
	def ideal_sentences(sentences)
	sentences_sorted = sentences.sort_by { \|sentence\| sentence.length }
	foo = sentences_sorted.length / 7
	ideal_sentences = sentences_sorted.slice(foo, foo + 1)
	{ :ideal_sentences => ideal_sentences.select { \|sentence\| sentence =~/\sis\W\|\sare\W/ } }
	end
	# Returns
	def file_names(stats)
	{ :names => stats.map{ \|h\| h[:text]} }
	end

	def build_reports (stats)
	reports = []
	stats.each do \|stat\|
	report = <<-REPORT
	#{stat[:text]} Inaugural Speech - Analysis Results
	Total number of characters is: #{stat[:chars][:tot_chars]}.
	Total number of characters less whitespace is: #{stat[:chars][:tot_chars_no_space]}
	Total number of words is: #{stat[:chunks][:word_count]}.
	Total number of sentences is: #{stat[:chunks][:sent_count]}
	Total number of paragraphs is: #{stat[:chunks][:para_count]}
	The average sentences per paragraph is: #{stat[:chunks][:sent_count] / stat[:chunks][:para_count]}
	The average words per sentence is: #{stat[:chunks][:word_count] / stat[:chunks][:sent_count]}
	#{stat[:words][:pgw]} % of all words in the text are non-fluff words.
	The ideal sentences are: #{stat[:sent][:ideal_sentences].join("-- ")}
	The top 10 most common words are: #{stat[:words][:most_common_words]}

	REPORT
	reports << report
	end #end each-loop
	reports
	end #end build_reports

	def collect_stats
	file_stats = []
	Dir.glob("files/*.txt").each do \|text_file\| # Iterate through all text files
	# Local Variables / Text Files
	text = File.read(text_file).sub('/files/', '').sub('.txt', '')
	sentences = text.gsub(/\s+/, ' ').strip.split(/\.\|\?\|!/)

	# Count the characters
	char_stats = count_chars(text) #Assign returned results from count_chars method

	# Count the words, sentences and paragraphs
	chunk_stats = count_chunks(text) #Assign returned retults to chunk_stats

	# Figure out the most Useful Words
	word_stats = useful_words(chunk_stats[:word_count],chunk_stats[:keywords]) #assign results to word_stats

	# Call to ideal_sentences which will produce the ideal sentences from text
	best_sentences = ideal_sentences(sentences) #assign results to best_sentences

	# Collect our stats and stuff them in the results array
	file_stats << {
	:chars => char_stats,
	:chunks => chunk_stats,
	:words => word_stats,
	:sent => best_sentences,
	:text => text_file,
	}
	end # End each-loop
	# Return the filestats at the end
	file_stats
	end
	# Call to collect_stats method containing calls to the functions that process the text files passed to var(text)
	stats = collect_stats
	# Process the file name(s) of the text files for use in the build_reports method
	names = file_names(stats)
	#Build reports that contain the values from our stats
	reports = build_reports(stats)

	header = <<HTML
	<html>
	<body>
	<pre>
	CS 132A Lab3
	Innagural Speech Analysis
	HTML

	footer = <<HTML
	</html>
	</body>
	</pre>
	HTML

	output = <<OUT
	#{header}
	#{reports}
	#{footer}
	OUT

	cgi = CGI.new
	cgi.out do
	output
	end