Last active
March 24, 2021 22:50
-
-
Save marcosgz/6da411c78b026f31ab61577784811212 to your computer and use it in GitHub Desktop.
RGB clustering using k-Means algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# It's ODM (Object Document Mapper) to the RGB data | |
class RGB | |
COLORS = { | |
r: ->(val) { "\e[31m#{val}\e[0m" }, | |
g: ->(val) { "\e[32m#{val}\e[0m" }, | |
b: ->(val) { "\e[34m#{val}\e[0m" }, | |
}.freeze | |
# @overload initialize(r, g, b) | |
# @param r [Integer] Value for R | |
# @param g [Integer] Value for G | |
# @param b [Integer] Value for B | |
def initialize(*args) | |
@r, @g, @b = args | |
end | |
# @param value [Array<R Integer, G Integer, B Integer>, RGB] An array with R, G, B values | |
# @raise <ArgumentError> when value cannot be converted to RGB | |
def self.coerce(value) | |
case value | |
when Array | |
RGB.new(*value) | |
when RGB | |
value | |
else | |
raise ArgumentError, format('can not convert %<value>p into a RGB') | |
end | |
end | |
# Calculate euclidian distance between two instances of RGB | |
# @param a [RBG] An instance of RGB | |
# @param b [RBG] An instance of RGB | |
# @return [Float] | |
def self.distance(a, b) | |
power = %i[r g b].map { |attribute| (a[attribute] - b[attribute]) ** 2 }.reduce(&:+) | |
Math.sqrt(power) | |
end | |
# @param variable_name [Symbol, String] The name of the instance variable | |
# @raise [ArgumentError] when instance variable is not defined | |
def [](variable_name) | |
var_name = :"@#{variable_name}" | |
raise ArgumentError unless instance_variable_defined?(var_name) | |
instance_variable_get(var_name) | |
end | |
# Calculate euclidian distance to the given RGB object | |
# @param rgb[RGB] An instance of RGB | |
# @raise [ArgumentError] When the given rgb is not an instance of RGB | |
def distance(rgb) | |
raise ArgumentError unless rgb.is_a?(RGB) | |
RGB.distance(self, rgb) | |
end | |
# @return [Array<Integer, Integer, Integer>] Returns an array with [R, G, B] values | |
def to_a | |
[@r, @g, @b] | |
end | |
# @option color [Boolean] colored class name | |
def inspect(color: true) | |
if color | |
attrs = COLORS.map { |k, v| v.(self[k]) }.compact.join(' ') | |
"#<#{COLORS.map { |k, v| format('%s:%s', v.(k.to_s.upcase), v.(self[k])) }.compact.join(' ')}>" | |
else | |
attrs = COLORS.keys.map { |k| self[k].to_s.rjust(3) }.compact.join(' ') | |
"#<RGB #{attrs}>" | |
end | |
end | |
alias to_s inspect | |
end | |
# Used to group similar RBG objects using K-Means algoritm. | |
class Clusterer | |
MIN_VALUE = 0 | |
MAX_VALUE = 255 | |
attr_reader :dataset, :interactions | |
# Initialize a new Clusterer | |
# @param dataset [Array<RBG>] A collection of RGB instances | |
def initialize(dataset) | |
@dataset = dataset | |
end | |
# The results differ for each run | |
# | |
# @param opts [Hash] Options to create K-Means clusters | |
# @option opts [Integer] :centroids (2) The number of centroids to group dataset | |
# @option opts [Integer] :max_interactions (200) The maximum number of times the algorith should execute | |
# @option opts [Boolean] :debug (true) Enable/Disable verbose logging | |
# @option opts [Integer] :min_movement (0.05) The minimum moviment that the centroid should move between interactions | |
# @return [Hash<RGB, Array>] Return group of RGB | |
def cluster(centroids: 3, max_interactions: 200, debug: false, min_movement: 0.05) | |
centroids = centroids.times.map { RGB.coerce(generate_random_rgb_values) } | |
reload! | |
cluster = nil | |
until (movement = centroids_movement(centroids)) < min_movement.abs || (max_interactions < @interactions) | |
cluster = centroids.each_with_object({}) { |v, mem| mem[v] = [] } | |
dataset.each do |rgb| | |
centroid = centroids.map { |centroid| [centroid, centroid.distance(rgb)] }.sort_by {|_cid, dist| dist }.dig(0, 0) | |
cluster[centroid] << rgb | |
end | |
@interactions += 1 | |
if debug | |
puts format('Interaction #%<loop>d | Movement of: %<movement>.2f', | |
loop: @interactions, | |
movement: movement, | |
) | |
cluster_tree(cluster) | |
end | |
centroids = cluster.map do |centroid, values| | |
if values.any? | |
RGB.new(*values.map(&:to_a).transpose.map {|v| v.reduce(&:+) / v.size }) | |
else | |
centroid | |
end | |
end | |
end | |
cluster | |
end | |
def cluster_tree(cluster) | |
cluster.each do |key, values| | |
puts("─> #{key}") | |
values.each do |value| | |
puts(" #{value == values.last ? '└' : '├' }──>#{value.inspect}") | |
end | |
end | |
end | |
private | |
def centroids_movement(centroids) | |
unless @prev_centroids # First execution this variable will be nil | |
@prev_centroids = centroids | |
return MAX_VALUE + 1 | |
end | |
result = [centroids, @prev_centroids].transpose.map { |a, b| a.distance(b) }.sum | |
@prev_centroids = centroids | |
result.abs | |
end | |
def reload! | |
@prev_centroids = nil | |
@interactions = 0 | |
end | |
# Generate random values within the permitted range | |
def generate_random_rgb_values | |
3.times.map { (MIN_VALUE..MAX_VALUE).to_a.sample } | |
end | |
end | |
# Initialize a collection of RGBs | |
samples = [ | |
[1, 10, 200], | |
[2, 20, 230], | |
[6, 25, 150], | |
[7, 45, 100], | |
[10, 50, 125], | |
[3, 24, 111], | |
[100, 4, 10], | |
[250, 7, 50], | |
[243, 5, 68], | |
[210, 2, 90], | |
[200, 1, 95], | |
[215, 0, 68], | |
[56, 200, 1], | |
[79, 234, 3], | |
[80, 210, 8], | |
[95, 200, 10], | |
[80, 210, 4], | |
[49, 207, 1], | |
].map do |sample| | |
RGB.new(*sample) | |
end | |
clusterer = Clusterer.new(samples) | |
cluster = clusterer.cluster(debug: false) | |
puts "Result after #{clusterer.interactions} interactions:" | |
clusterer.cluster_tree(cluster) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ruby kmeans.rb [ruby-2.6.3p62] | |
Result after 4 interactions: | |
─> #<R:73 G:210 B:4> | |
├──>#<R:56 G:200 B:1> | |
├──>#<R:79 G:234 B:3> | |
├──>#<R:80 G:210 B:8> | |
├──>#<R:95 G:200 B:10> | |
├──>#<R:80 G:210 B:4> | |
└──>#<R:49 G:207 B:1> | |
─> #<R:4 G:29 B:152> | |
├──>#<R:1 G:10 B:200> | |
├──>#<R:2 G:20 B:230> | |
├──>#<R:6 G:25 B:150> | |
├──>#<R:7 G:45 B:100> | |
├──>#<R:10 G:50 B:125> | |
└──>#<R:3 G:24 B:111> | |
─> #<R:203 G:3 B:63> | |
├──>#<R:100 G:4 B:10> | |
├──>#<R:250 G:7 B:50> | |
├──>#<R:243 G:5 B:68> | |
├──>#<R:210 G:2 B:90> | |
├──>#<R:200 G:1 B:95> | |
└──>#<R:215 G:0 B:68> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment