mschuetz · October 25, 2011 21:29
diff --git a/spam.rb b/spam.rb
 def sum(arr)
  res = 0
  arr.each{|e| res+=e}
  res
 end

 def mult(arr)
  res = 1
  arr.each{|e| res*=e}
  res
 end

 $training = {
  :spam=>["offer is secret", "click secret link", "secret sports link"],
  :ham=>["play sports today", "went play sports", "secret sports event", "sports is today", "sports costs money"]
 }

 $LSk = 1.0
 $dict = {}
 $global_dict = {}
 $global_dict.default=0.0
 $priors={}
 $total_words={}
 $total_words.default=0
 $total_words_all=0
 total_messages=0
 $training.each_value{|messages| total_messages += messages.size}

 $training.each_pair{|klass, messages|
  $dict[klass] = {}
  $dict[klass].default=0.0
  messages.each{|msg|
    msg.split.each{|word|
      $dict[klass][word]+=1
      $global_dict[word]+=1
      $total_words[klass]+=1
    }
  }
  $total_words_all+=$total_words[klass]
  $priors[klass] = (messages.size.to_f + $LSk) / (total_messages.to_f + $LSk * $training.keys.size)
 }

 p $dict
 p $priors

 def prob(word, klass)
  ($dict[klass][word] + $LSk) / ($total_words[klass] + $LSk * $global_dict.size)
 end

 def classify(message)
  res={}
  $training.each_key{|klass|
    words = message.split
    num = mult(words.map{|w| prob(w, klass)}) * $priors[klass]
    denom = 0.0 
    $training.each_key{|kk|
      denom += mult(words.map{|w| prob(w, kk)}) * $priors[kk]
    }
    res[klass] = num/denom
  }
  res
 end

 puts prob("today", :spam)
 puts prob("today", :ham)

 p classify("today is secret")
	def sum(arr)
	res = 0
	arr.each{\|e\| res+=e}
	res
	end

	def mult(arr)
	res = 1
	arr.each{\|e\| res*=e}
	res
	end

	$training = {
	:spam=>["offer is secret", "click secret link", "secret sports link"],
	:ham=>["play sports today", "went play sports", "secret sports event", "sports is today", "sports costs money"]
	}

	$LSk = 1.0
	$dict = {}
	$global_dict = {}
	$global_dict.default=0.0
	$priors={}
	$total_words={}
	$total_words.default=0
	$total_words_all=0
	total_messages=0
	$training.each_value{\|messages\| total_messages += messages.size}

	$training.each_pair{\|klass, messages\|
	$dict[klass] = {}
	$dict[klass].default=0.0
	messages.each{\|msg\|
	msg.split.each{\|word\|
	$dict[klass][word]+=1
	$global_dict[word]+=1
	$total_words[klass]+=1
	}
	}
	$total_words_all+=$total_words[klass]
	$priors[klass] = (messages.size.to_f + $LSk) / (total_messages.to_f + $LSk * $training.keys.size)
	}

	p $dict
	p $priors

	def prob(word, klass)
	($dict[klass][word] + $LSk) / ($total_words[klass] + $LSk * $global_dict.size)
	end

	def classify(message)
	res={}
	$training.each_key{\|klass\|
	words = message.split
	num = mult(words.map{\|w\| prob(w, klass)}) * $priors[klass]
	denom = 0.0
	$training.each_key{\|kk\|
	denom += mult(words.map{\|w\| prob(w, kk)}) * $priors[kk]
	}
	res[klass] = num/denom
	}
	res
	end

	puts prob("today", :spam)
	puts prob("today", :ham)

	p classify("today is secret")