Created
August 9, 2015 16:29
-
-
Save e-monson/1087a84c1d305cae0300 to your computer and use it in GitHub Desktop.
Implementing k-means in Elixir (unfinished)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Kmeans do | |
def test_points do | |
# This just outputs 100 random points for testing purposes | |
Stream.repeatedly(fn -> :random.uniform(100) end) | |
|> Stream.chunk(3) | |
|> Enum.take(100) | |
|> Enum.map(&List.to_tuple/1) | |
end | |
def point_range(list) when is_list(list) do | |
# This expects a list of {x, y, z} tuples and outputs a list of 3 tuples | |
# {min, max} | |
{x, y, z} = :lists.unzip3(list) | |
max = Enum.map([x, y, z], &Enum.max/1) | |
min = Enum.map([x, y, z], &Enum.min/1) | |
Enum.zip(min, max) | |
end | |
defp rand_in_range({min, max}) when is_number(min) and is_number(max) and min < max do | |
# This just outputs a random double within the given range | |
range = max - min | |
:random.uniform * range + min | |
end | |
def rand_point(ranges) when is_list(ranges) do | |
# Outputs a random point within the supplied ranges (from point_range) | |
Enum.map(ranges, &rand_in_range/1) |> | |
List.to_tuple | |
end | |
def init_centroids(points, count) when is_list(points) and is_number(count) do | |
# Finds the minimum and maximum values for each dimension and outputs the | |
# specified number of random points within these ranges | |
ranges = point_range(points) | |
Stream.repeatedly(fn -> rand_point(ranges) end) | |
|> Enum.take(count) | |
end | |
defp mean(list) when is_list(list) do | |
Enum.sum(list) / Enum.count(list) | |
end | |
def mean_point(list) when is_list(list) do | |
# Calculates the mean point of the supplied list of points | |
{x, y, z} = :lists.unzip3(list) | |
Enum.map([x, y, z], &mean/1) | |
|> List.to_tuple | |
end | |
def distance(pt1, pt2) do | |
# Calculates Euclidean distance between two points | |
Enum.zip(Tuple.to_list(pt1), Tuple.to_list(pt2)) | |
|> Enum.map(fn ({a, b}) -> :math.pow((a - b), 2) end) | |
|> Enum.sum | |
|> :math.sqrt | |
end | |
def closest_centroid(point, centroids) when is_tuple(point) and is_list(centroids) do | |
# I want this to take a point (point) and a list of points (centroids) and | |
# return the centroid nearest to the point. This works, but it feels very | |
# clumsy to me. | |
distances = Enum.map(centroids, &(distance(point, &1))) | |
min = Enum.min(distances) | |
index = Enum.find_index(distances, &(&1 == min)) | |
Enum.at(centroids, index) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment