Created
July 13, 2017 15:59
-
-
Save lygaret/1fd9e5a17db3e124cb4a93784d78185a to your computer and use it in GitHub Desktop.
ruby module for kmeans clustering (1d)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Methods for clustering data | |
module Clustering | |
# k-means clustering for 1d data | |
# pass a block to project from an element into a number | |
def self.kmeans_1d(data, k: 2, &projection) | |
return [data] if k == 1 | |
if data.to_a.uniq(&projection).length < 2 | |
return [data].concat((k - 1).times.collect { [] }) | |
end | |
projection ||= ->(d) { d } | |
clusters = nil | |
# initial centers are over the max and min values, and then placed via logistic function. | |
min, max = data.map(&projection).minmax | |
step = (max - min) / (k - 1).to_f | |
mid = (max - min) / 2.0 | |
logistic = ->(x) { max / (1 + (Math::E**(1.5 * -(x - mid)))) } | |
centers = Range.new(min, max).step(step).map(&logistic) | |
15.times do | |
clusters = k.times.map { || [] } | |
data.each do |d| | |
mindist, minpoint = Float::INFINITY, nil | |
centers.each_with_index do |center, i| | |
dist = (center - projection.call(d)).abs | |
if dist < mindist | |
mindist = dist | |
minpoint = i | |
end | |
end | |
clusters[minpoint] << d | |
end | |
centers = clusters.map do |inner| | |
sum = inner.map(&projection).sum | |
(sum / inner.length.to_f) rescue 0 | |
end | |
end | |
clusters | |
.each { |inner| inner.sort_by!(&projection).reverse! } | |
.sort_by { |inner| -inner.map(&projection).max rescue Float::INFINITY } | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment