Skip to content

Instantly share code, notes, and snippets.

@captain-yossarian
Created September 24, 2019 07:29
Show Gist options
  • Select an option

  • Save captain-yossarian/66effb18b029951a7e4736deec16607d to your computer and use it in GitHub Desktop.

Select an option

Save captain-yossarian/66effb18b029951a7e4736deec16607d to your computer and use it in GitHub Desktop.
kmeans algorithm
use rand::{thread_rng, Rng};
pub fn mean(numbers: Vec<f32>) -> f32 {
numbers.iter().fold(0.0, |acc, value| acc + value) / numbers.len() as f32
}
pub fn distance(a: [f32; 2], b: [f32; 2]) -> f64 {
(((b[0] - a[0]).powf(2.0) + (b[1] - a[1]).powf(2.0)) as f64).sqrt()
}
pub fn random_number<T>(bottom: T, up: T) -> T
where
T: rand::distributions::uniform::SampleUniform,
{
let mut random = thread_rng();
random.gen_range(bottom, up)
}
pub type Point = [f32; 2];
#[derive(Debug)]
pub struct Range {
min: f32,
max: f32,
}
const DIMENSIONALITY: usize = 2;
#[derive(Debug)]
pub struct KlusterMeans {
klusters: i8,
points: Vec<Point>,
iterations: i32,
centroid_assignments: Vec<i32>,
pub centroids: Vec<Point>,
}
impl KlusterMeans {
pub fn new(klusters: i8, points: Vec<Point>) -> KlusterMeans {
let mut centroid_assignments: Vec<i32> = vec![];
points.iter().for_each(|_| centroid_assignments.push(0));
KlusterMeans {
klusters,
points,
iterations: 0,
centroid_assignments,
centroids: vec![],
}
}
pub fn get_dimensionality(&self) -> usize {
self.points[0].len()
}
pub fn get_range_for_dimension(&self, dimension: usize) -> Range {
let values = self
.points
.iter()
.fold(Range { min: 0.0, max: 0.0 }, |acc, elem| {
let Range { min, max } = acc;
let value = elem[dimension];
Range {
min: min.min(value),
max: max.max(value),
}
});
values
}
pub fn get_all_dimension_ranges(&self) -> Vec<Range> {
let mut ranges: Vec<Range> = Vec::with_capacity(DIMENSIONALITY);
for i in 0..DIMENSIONALITY {
ranges.push(self.get_range_for_dimension(i))
}
ranges
}
pub fn init_random_centroids(&mut self) -> Vec<Point> {
let dimension_ranges = self.get_all_dimension_ranges();
let mut centroids: Vec<Point> = Vec::with_capacity(self.klusters as usize);
for _ in 0..self.klusters {
let mut point: Point = [0.0, 0.0];
for dimension in 0..DIMENSIONALITY {
let Range { min, max } = dimension_ranges[dimension];
point[dimension] = random_number::<f32>(min, max);
}
centroids.push(point)
}
self.centroids = centroids.clone();
centroids
}
pub fn centroid_exist(&self, index: usize) -> Option<i32> {
match self.centroid_assignments.get(index) {
Some(value) => Some(*value),
None => None,
}
}
///
/// ERROR is in this method, last_assigned always None
pub fn assign_point_to_centroid(&mut self, point_index: usize) -> bool {
let mut min_distance = 0.0;
let mut assigned_centroid: Option<i32> = None;
let mut result = false;
for index in 0..self.centroids.len() {
let centroid = self.centroids[index];
let distance_to_centroid = distance(self.points[point_index], centroid);
if min_distance == 0.0 || distance_to_centroid < min_distance {
min_distance = distance_to_centroid;
assigned_centroid = Some(index as i32);
}
}
if let Some(centroid) = assigned_centroid {
self.centroid_assignments[point_index] = centroid;
let last_assigned = self.centroid_exist(point_index);
if let Some(last) = last_assigned {
result = last!=centroid;
}
}
result
}
pub fn assign_points_to_centroids(&mut self) -> bool {
let mut was_any_reassigned = false;
for index in 0..self.points.len() {
let was_reassigned = self.assign_point_to_centroid(index);
if was_reassigned == true {
return true;
}
}
false
}
pub fn get_points_for_centroid(&self, centroid_index: usize) -> Vec<Point> {
let mut points: Vec<Point> = vec![];
for index in 0..self.points.len() {
let assignment = self.centroid_exist(index);
if let Some(value) = assignment {
if value == centroid_index as i32 {
points.push(self.points[index])
}
}
}
points
}
pub fn update_centroid_location(&mut self, index: usize) -> Point {
let centroid_points = self.get_points_for_centroid(index);
let mut centroid: Point = [0.0, 0.0];
for dimension in 0..DIMENSIONALITY {
let points = centroid_points
.iter()
.map(|elem| elem[dimension])
.collect::<Vec<f32>>();
let debug = points.clone();
centroid[dimension] = mean(points);
}
self.centroids[index] = centroid;
centroid
}
pub fn update_centroid_locations(&mut self) {
for index in 0..self.centroids.len() {
self.update_centroid_location(index);
}
}
pub fn run(&mut self) {
let mut iteration = 0;
loop {
let did_assignments_change = self.assign_points_to_centroids();
self.update_centroid_locations();
iteration += 1;
if iteration == 1000{
break;
}
}
}
pub fn reset(&mut self) {
self.centroid_assignments = vec![];
}
}
fn main() {
let points = vec![
[1.0, 2.0],
[2.0, 3.0],
[2.0, 5.0],
[1.0, 6.0],
[4.0, 6.0],
[3.0, 5.0],
[2.0, 4.0],
[4.0, 3.0],
[5.0, 2.0],
[6.0, 9.0],
[4.0, 4.0],
[3.0, 3.0],
[8.0, 6.0],
[7.0, 5.0],
[9.0, 6.0],
[9.0, 7.0],
[8.0, 8.0],
[7.0, 9.0],
[11.0, 3.0],
[11.0, 2.0],
[9.0, 9.0],
[7.0, 8.0],
[6.0, 8.0],
[12.0, 2.0],
[14.0, 3.0],
[15.0, 1.0],
[15.0, 4.0],
[14.0, 2.0],
[13.0, 1.0],
[16.0, 4.0],
];
// draw(vec![(0.0, 0.0), (2.0, 2.0), (4.0, 7.0)]);
let mut kmeans = KlusterMeans::new(3, points);
kmeans.init_random_centroids();
kmeans.run();
println!("Mean {:?}", kmeans);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment