Skip to content

Instantly share code, notes, and snippets.

@lygaret
Created July 13, 2017 15:59
Show Gist options
  • Save lygaret/1fd9e5a17db3e124cb4a93784d78185a to your computer and use it in GitHub Desktop.
Save lygaret/1fd9e5a17db3e124cb4a93784d78185a to your computer and use it in GitHub Desktop.
ruby module for kmeans clustering (1d)
# Methods for clustering data
module Clustering
# k-means clustering for 1d data
# pass a block to project from an element into a number
def self.kmeans_1d(data, k: 2, &projection)
return [data] if k == 1
if data.to_a.uniq(&projection).length < 2
return [data].concat((k - 1).times.collect { [] })
end
projection ||= ->(d) { d }
clusters = nil
# initial centers are over the max and min values, and then placed via logistic function.
min, max = data.map(&projection).minmax
step = (max - min) / (k - 1).to_f
mid = (max - min) / 2.0
logistic = ->(x) { max / (1 + (Math::E**(1.5 * -(x - mid)))) }
centers = Range.new(min, max).step(step).map(&logistic)
15.times do
clusters = k.times.map { || [] }
data.each do |d|
mindist, minpoint = Float::INFINITY, nil
centers.each_with_index do |center, i|
dist = (center - projection.call(d)).abs
if dist < mindist
mindist = dist
minpoint = i
end
end
clusters[minpoint] << d
end
centers = clusters.map do |inner|
sum = inner.map(&projection).sum
(sum / inner.length.to_f) rescue 0
end
end
clusters
.each { |inner| inner.sort_by!(&projection).reverse! }
.sort_by { |inner| -inner.map(&projection).max rescue Float::INFINITY }
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment