Created
February 14, 2013 19:17
-
-
Save mathias-brandewinder/4955476 to your computer and use it in GitHub Desktop.
Run k-means clustering on list of cities and plot results as labeled scatterplot in Excel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type City = { Name: string; Lat: float; Lng: float } | |
let cities = | |
[ ("San Francisco", "CA", "United States"); | |
("New York", "NY", "United States"); | |
("Hoboken", "NJ", "United States"); | |
("Redmond", "WA", "United States"); | |
("Seattle", "WA", "United States"); | |
("Boston", "MA", "United States"); | |
("Boston", "MA", "United States"); | |
("Romeoville", "IL", "United States"); | |
("Nashville", "TN", "United States"); | |
("Nashville", "TN", "United States"); | |
("Detroit", "MI", "United States"); | |
("Ann Harbor", "MI", "United States"); | |
("Houston", "TX", "United States") ] | |
let rng = new System.Random() | |
let fakeService (data: (string * string * string)) = | |
let city, state, country = data | |
{ Name = city; Lat = rng.NextDouble(); Lng = rng.NextDouble() } | |
// extraction of cities | |
let positions = cities |> List.map fakeService | |
// k-means clustering | |
let pos = positions |> List.map (fun c -> [| c.Lat; c.Lng |]) | |
let factory (ps: (float []) seq) (k: int) = randomCentroids rng pos k | |
let analysis = kmeans euclidean factory avgCentroid (List.map (fun p -> [| p.Lat; p.Lng |]) positions) | |
// plot clusters | |
let plotClusters k = | |
let centers, classifier = analysis k | |
let datapoints = | |
positions | |
|> List.map (fun c -> c, [| c.Lat; c.Lng |]) | |
|> List.map (fun (c, p) -> c, List.findIndex (fun c -> c = classifier p) centers) | |
|> List.map (fun (c, i) -> (c.Lat, c.Lng, i, c.Name)) | |
let centroids = centers |> List.map (fun c -> c.[0], c.[1], -1, "CENTROID") | |
List.append datapoints centroids |> labeledplot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment