Created
September 24, 2019 07:29
-
-
Save captain-yossarian/66effb18b029951a7e4736deec16607d to your computer and use it in GitHub Desktop.
kmeans algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use rand::{thread_rng, Rng}; | |
| pub fn mean(numbers: Vec<f32>) -> f32 { | |
| numbers.iter().fold(0.0, |acc, value| acc + value) / numbers.len() as f32 | |
| } | |
| pub fn distance(a: [f32; 2], b: [f32; 2]) -> f64 { | |
| (((b[0] - a[0]).powf(2.0) + (b[1] - a[1]).powf(2.0)) as f64).sqrt() | |
| } | |
| pub fn random_number<T>(bottom: T, up: T) -> T | |
| where | |
| T: rand::distributions::uniform::SampleUniform, | |
| { | |
| let mut random = thread_rng(); | |
| random.gen_range(bottom, up) | |
| } | |
| pub type Point = [f32; 2]; | |
| #[derive(Debug)] | |
| pub struct Range { | |
| min: f32, | |
| max: f32, | |
| } | |
| const DIMENSIONALITY: usize = 2; | |
| #[derive(Debug)] | |
| pub struct KlusterMeans { | |
| klusters: i8, | |
| points: Vec<Point>, | |
| iterations: i32, | |
| centroid_assignments: Vec<i32>, | |
| pub centroids: Vec<Point>, | |
| } | |
| impl KlusterMeans { | |
| pub fn new(klusters: i8, points: Vec<Point>) -> KlusterMeans { | |
| let mut centroid_assignments: Vec<i32> = vec![]; | |
| points.iter().for_each(|_| centroid_assignments.push(0)); | |
| KlusterMeans { | |
| klusters, | |
| points, | |
| iterations: 0, | |
| centroid_assignments, | |
| centroids: vec![], | |
| } | |
| } | |
| pub fn get_dimensionality(&self) -> usize { | |
| self.points[0].len() | |
| } | |
| pub fn get_range_for_dimension(&self, dimension: usize) -> Range { | |
| let values = self | |
| .points | |
| .iter() | |
| .fold(Range { min: 0.0, max: 0.0 }, |acc, elem| { | |
| let Range { min, max } = acc; | |
| let value = elem[dimension]; | |
| Range { | |
| min: min.min(value), | |
| max: max.max(value), | |
| } | |
| }); | |
| values | |
| } | |
| pub fn get_all_dimension_ranges(&self) -> Vec<Range> { | |
| let mut ranges: Vec<Range> = Vec::with_capacity(DIMENSIONALITY); | |
| for i in 0..DIMENSIONALITY { | |
| ranges.push(self.get_range_for_dimension(i)) | |
| } | |
| ranges | |
| } | |
| pub fn init_random_centroids(&mut self) -> Vec<Point> { | |
| let dimension_ranges = self.get_all_dimension_ranges(); | |
| let mut centroids: Vec<Point> = Vec::with_capacity(self.klusters as usize); | |
| for _ in 0..self.klusters { | |
| let mut point: Point = [0.0, 0.0]; | |
| for dimension in 0..DIMENSIONALITY { | |
| let Range { min, max } = dimension_ranges[dimension]; | |
| point[dimension] = random_number::<f32>(min, max); | |
| } | |
| centroids.push(point) | |
| } | |
| self.centroids = centroids.clone(); | |
| centroids | |
| } | |
| pub fn centroid_exist(&self, index: usize) -> Option<i32> { | |
| match self.centroid_assignments.get(index) { | |
| Some(value) => Some(*value), | |
| None => None, | |
| } | |
| } | |
| /// | |
| /// ERROR is in this method, last_assigned always None | |
| pub fn assign_point_to_centroid(&mut self, point_index: usize) -> bool { | |
| let mut min_distance = 0.0; | |
| let mut assigned_centroid: Option<i32> = None; | |
| let mut result = false; | |
| for index in 0..self.centroids.len() { | |
| let centroid = self.centroids[index]; | |
| let distance_to_centroid = distance(self.points[point_index], centroid); | |
| if min_distance == 0.0 || distance_to_centroid < min_distance { | |
| min_distance = distance_to_centroid; | |
| assigned_centroid = Some(index as i32); | |
| } | |
| } | |
| if let Some(centroid) = assigned_centroid { | |
| self.centroid_assignments[point_index] = centroid; | |
| let last_assigned = self.centroid_exist(point_index); | |
| if let Some(last) = last_assigned { | |
| result = last!=centroid; | |
| } | |
| } | |
| result | |
| } | |
| pub fn assign_points_to_centroids(&mut self) -> bool { | |
| let mut was_any_reassigned = false; | |
| for index in 0..self.points.len() { | |
| let was_reassigned = self.assign_point_to_centroid(index); | |
| if was_reassigned == true { | |
| return true; | |
| } | |
| } | |
| false | |
| } | |
| pub fn get_points_for_centroid(&self, centroid_index: usize) -> Vec<Point> { | |
| let mut points: Vec<Point> = vec![]; | |
| for index in 0..self.points.len() { | |
| let assignment = self.centroid_exist(index); | |
| if let Some(value) = assignment { | |
| if value == centroid_index as i32 { | |
| points.push(self.points[index]) | |
| } | |
| } | |
| } | |
| points | |
| } | |
| pub fn update_centroid_location(&mut self, index: usize) -> Point { | |
| let centroid_points = self.get_points_for_centroid(index); | |
| let mut centroid: Point = [0.0, 0.0]; | |
| for dimension in 0..DIMENSIONALITY { | |
| let points = centroid_points | |
| .iter() | |
| .map(|elem| elem[dimension]) | |
| .collect::<Vec<f32>>(); | |
| let debug = points.clone(); | |
| centroid[dimension] = mean(points); | |
| } | |
| self.centroids[index] = centroid; | |
| centroid | |
| } | |
| pub fn update_centroid_locations(&mut self) { | |
| for index in 0..self.centroids.len() { | |
| self.update_centroid_location(index); | |
| } | |
| } | |
| pub fn run(&mut self) { | |
| let mut iteration = 0; | |
| loop { | |
| let did_assignments_change = self.assign_points_to_centroids(); | |
| self.update_centroid_locations(); | |
| iteration += 1; | |
| if iteration == 1000{ | |
| break; | |
| } | |
| } | |
| } | |
| pub fn reset(&mut self) { | |
| self.centroid_assignments = vec![]; | |
| } | |
| } | |
| fn main() { | |
| let points = vec![ | |
| [1.0, 2.0], | |
| [2.0, 3.0], | |
| [2.0, 5.0], | |
| [1.0, 6.0], | |
| [4.0, 6.0], | |
| [3.0, 5.0], | |
| [2.0, 4.0], | |
| [4.0, 3.0], | |
| [5.0, 2.0], | |
| [6.0, 9.0], | |
| [4.0, 4.0], | |
| [3.0, 3.0], | |
| [8.0, 6.0], | |
| [7.0, 5.0], | |
| [9.0, 6.0], | |
| [9.0, 7.0], | |
| [8.0, 8.0], | |
| [7.0, 9.0], | |
| [11.0, 3.0], | |
| [11.0, 2.0], | |
| [9.0, 9.0], | |
| [7.0, 8.0], | |
| [6.0, 8.0], | |
| [12.0, 2.0], | |
| [14.0, 3.0], | |
| [15.0, 1.0], | |
| [15.0, 4.0], | |
| [14.0, 2.0], | |
| [13.0, 1.0], | |
| [16.0, 4.0], | |
| ]; | |
| // draw(vec![(0.0, 0.0), (2.0, 2.0), (4.0, 7.0)]); | |
| let mut kmeans = KlusterMeans::new(3, points); | |
| kmeans.init_random_centroids(); | |
| kmeans.run(); | |
| println!("Mean {:?}", kmeans); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment