pachacamac · May 22, 2012 19:11
diff --git a/clusterize.rb b/clusterize.rb
 # A
 #   B
 #
 #               D
 #
 #         C
 #             E
 #
 # F
 #
 # # # # # # # # # # #
 # 1 2 3 4 5 6 7 8 9 10

 #http://www.psychstat.missouristate.edu/multibook/mlt04.htm

 data = { [1,1] => [:A], [2,2] => [:B], [5,6] => [:C],
         [8,4] => [:D], [7,7] => [:E], [1,9] => [:F] }

 # Euclidean distance between two n-dimensional points
 def distance(a,b)
  Math.sqrt(a.zip(b).reduce(0.0){|s,e| s+=(e[0]-e[1])**2})
 end

 # Center point between two n-dimensional points
 def centroid(a,b)
  a.zip(b).map!{|e| ((e[0]+e[1]) / 2.0).round }
 end

 # Iteratively create clusters inside n-dimendional data
 def clusterize!(h)
  a,b = h.keys.combination(2).min_by{|e| distance(*e)}
  v = h[a] + h[b]
  h.delete a
  h.delete b
  h[centroid(a,b)] = v
 end

 p data
 # {[1, 1]=>[:A], [2, 2]=>[:B], [5, 6]=>[:C], [8, 4]=>[:D], [7, 7]=>[:E], [1, 9]=>[:F]}
 # let's create 3 clusters ...
 clusterize!(data) while data.size > 3
 p data
 # {[1, 9]=>[:F], [2, 2]=>[:A, :B], [7, 6]=>[:D, :C, :E]}
	# A
	# B
	#
	# D
	#
	# C
	# E
	#
	# F
	#
	# # # # # # # # # # #
	# 1 2 3 4 5 6 7 8 9 10

	#http://www.psychstat.missouristate.edu/multibook/mlt04.htm

	data = { [1,1] => [:A], [2,2] => [:B], [5,6] => [:C],
	[8,4] => [:D], [7,7] => [:E], [1,9] => [:F] }

	# Euclidean distance between two n-dimensional points
	def distance(a,b)
	Math.sqrt(a.zip(b).reduce(0.0){\|s,e\| s+=(e[0]-e[1])**2})
	end

	# Center point between two n-dimensional points
	def centroid(a,b)
	a.zip(b).map!{\|e\| ((e[0]+e[1]) / 2.0).round }
	end

	# Iteratively create clusters inside n-dimendional data
	def clusterize!(h)
	a,b = h.keys.combination(2).min_by{\|e\| distance(*e)}
	v = h[a] + h[b]
	h.delete a
	h.delete b
	h[centroid(a,b)] = v
	end

	p data
	# {[1, 1]=>[:A], [2, 2]=>[:B], [5, 6]=>[:C], [8, 4]=>[:D], [7, 7]=>[:E], [1, 9]=>[:F]}
	# let's create 3 clusters ...
	clusterize!(data) while data.size > 3
	p data
	# {[1, 9]=>[:F], [2, 2]=>[:A, :B], [7, 6]=>[:D, :C, :E]}