splittingred · December 14, 2015 05:29
diff --git a/normalized_word_frequency.rb b/normalized_word_frequency.rb
 ##
 # Finds a normalized ranking of word frequency given an article of text
 #
 class Hash
  ##
  # Normalize the array to values between 0 and 1
  #
  # @param [Boolean] weight_lower_value Set to true to give more weight to lower values
  #
  def normalize(weight_lower_value = false)
    lower_bound = 0.00000001
    s = []
    if weight_lower_value
      min_val = self.values.min
      self.each do |k,v|
        z = min_val.to_f/([lower_bound,1].max)
        s << [k,z]
      end
    else
      max_val = self.values.max
      max_val = lower_bound if max_val == 0
      self.each do |k,v|
        s << [k,(v.to_f/max_val)]
      end
    end
    s
  end
 end
 class Array
  ##
  # Find a frequency hash of the words in this array
  #
  # @param [Array] exclude_words An array of words to exclude
  #
  def word_frequency(exclude_words = %w(the for and has))
    f = {}
    self.each do |w|
      next if w.to_i > 0 or w.length < 3 or exclude_words.include?(w)
      if f[w.to_s]
        f[w.to_s] = f[w.to_s] + 1
      else
        f[w.to_s] = 1
      end
    end
    f
  end
 end

 file = ARGV[0] ? ARGV[0].to_s : "article.txt"
 words = File.read(file).scan(/\w+/)
 puts words.word_frequency.normalize.sort_by {|k,v| v}.inspect
	##
	# Finds a normalized ranking of word frequency given an article of text
	#
	class Hash
	##
	# Normalize the array to values between 0 and 1
	#
	# @param [Boolean] weight_lower_value Set to true to give more weight to lower values
	#
	def normalize(weight_lower_value = false)
	lower_bound = 0.00000001
	s = []
	if weight_lower_value
	min_val = self.values.min
	self.each do \|k,v\|
	z = min_val.to_f/([lower_bound,1].max)
	s << [k,z]
	end
	else
	max_val = self.values.max
	max_val = lower_bound if max_val == 0
	self.each do \|k,v\|
	s << [k,(v.to_f/max_val)]
	end
	end
	s
	end
	end
	class Array
	##
	# Find a frequency hash of the words in this array
	#
	# @param [Array] exclude_words An array of words to exclude
	#
	def word_frequency(exclude_words = %w(the for and has))
	f = {}
	self.each do \|w\|
	next if w.to_i > 0 or w.length < 3 or exclude_words.include?(w)
	if f[w.to_s]
	f[w.to_s] = f[w.to_s] + 1
	else
	f[w.to_s] = 1
	end
	end
	f
	end
	end

	file = ARGV[0] ? ARGV[0].to_s : "article.txt"
	words = File.read(file).scan(/\w+/)
	puts words.word_frequency.normalize.sort_by {\|k,v\| v}.inspect