Skip to content

Instantly share code, notes, and snippets.

@e-monson
Created August 9, 2015 16:29
Show Gist options
  • Save e-monson/1087a84c1d305cae0300 to your computer and use it in GitHub Desktop.
Save e-monson/1087a84c1d305cae0300 to your computer and use it in GitHub Desktop.
Implementing k-means in Elixir (unfinished)
defmodule Kmeans do
def test_points do
# This just outputs 100 random points for testing purposes
Stream.repeatedly(fn -> :random.uniform(100) end)
|> Stream.chunk(3)
|> Enum.take(100)
|> Enum.map(&List.to_tuple/1)
end
def point_range(list) when is_list(list) do
# This expects a list of {x, y, z} tuples and outputs a list of 3 tuples
# {min, max}
{x, y, z} = :lists.unzip3(list)
max = Enum.map([x, y, z], &Enum.max/1)
min = Enum.map([x, y, z], &Enum.min/1)
Enum.zip(min, max)
end
defp rand_in_range({min, max}) when is_number(min) and is_number(max) and min < max do
# This just outputs a random double within the given range
range = max - min
:random.uniform * range + min
end
def rand_point(ranges) when is_list(ranges) do
# Outputs a random point within the supplied ranges (from point_range)
Enum.map(ranges, &rand_in_range/1) |>
List.to_tuple
end
def init_centroids(points, count) when is_list(points) and is_number(count) do
# Finds the minimum and maximum values for each dimension and outputs the
# specified number of random points within these ranges
ranges = point_range(points)
Stream.repeatedly(fn -> rand_point(ranges) end)
|> Enum.take(count)
end
defp mean(list) when is_list(list) do
Enum.sum(list) / Enum.count(list)
end
def mean_point(list) when is_list(list) do
# Calculates the mean point of the supplied list of points
{x, y, z} = :lists.unzip3(list)
Enum.map([x, y, z], &mean/1)
|> List.to_tuple
end
def distance(pt1, pt2) do
# Calculates Euclidean distance between two points
Enum.zip(Tuple.to_list(pt1), Tuple.to_list(pt2))
|> Enum.map(fn ({a, b}) -> :math.pow((a - b), 2) end)
|> Enum.sum
|> :math.sqrt
end
def closest_centroid(point, centroids) when is_tuple(point) and is_list(centroids) do
# I want this to take a point (point) and a list of points (centroids) and
# return the centroid nearest to the point. This works, but it feels very
# clumsy to me.
distances = Enum.map(centroids, &(distance(point, &1)))
min = Enum.min(distances)
index = Enum.find_index(distances, &(&1 == min))
Enum.at(centroids, index)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment