Skip to content

Instantly share code, notes, and snippets.

@andrewxhill
Created November 30, 2012 23:53
Show Gist options
  • Save andrewxhill/4179609 to your computer and use it in GitHub Desktop.
Save andrewxhill/4179609 to your computer and use it in GitHub Desktop.
kmeans function and examples
--DROP FUNCTION axh_kmeans_geom_sort(geometry[], integer, integer);
CREATE OR REPLACE FUNCTION axh_kmeans_geom_sort(the_geom geometry[], clusters integer DEFAULT NULL, iterations integer DEFAULT NULL)
RETURNS SETOF GEOMETRY
AS $$
DECLARE
h GEOMETRY; -- rectangle cell
c INTEGER; --
i INTEGER; --loop iterator
g INTEGER; --loop iterator
t INTEGER; --len of geoms
bx GEOMETRY[];
kmeans GEOMETRY[];
BEGIN
IF clusters IS NULL THEN
clusters := 5;
END IF;
IF iterations IS NULL THEN
iterations := 8;
END IF;
t := Array_Length(the_geom, 1);
c := 0;
FOR i IN 1..clusters LOOP --create starting seed locations for kmeans
bx = array_append(bx, the_geom[(i*(t/clusters))]);
c = c + i;
END LOOP;
FOR g IN 1..iterations LOOP --fine tune locations
SELECT ST_Accum(pp) INTO bx FROM
(SELECT ST_Centroid(ST_Collect(geom)) pp FROM
(SELECT geom, r FROM
(SELECT UNNEST(the_geom) AS geom) s,
(SELECT UNNEST(bx) AS k, Generate_Series(1, Array_Length(bx, 1)) AS r) AS m
WHERE ST_Distance(geom,k) = (SELECT MIN(ST_Distance(gg, geom)) FROM UNNEST(bx) as gg)
) foo
GROUP BY r
) bar;
END LOOP;
FOR i IN 1..Array_Length(bx,1) LOOP
RETURN NEXT bx[i];
END LOOP;
END
$$ LANGUAGE 'plpgsql' IMMUTABLE;
--Example Query working on Viz2
SELECT axh_kmeans_geom_sort(ST_Accum(the_geom), 10, 7) geom, GENERATE_SERIES(1,10) id FROM c22p;
--Example re-combination query
WITH kmeans AS (SELECT axh_kmeans_geom_sort(ST_Accum(the_geom), 10, 7) geom, GENERATE_SERIES(1,10) id FROM c22p)
SELECT k.the_geom_webmercator, (SELECT m.id FROM kmeans m ORDER BY m.geom <-> k.the_geom LIMIT 1 ) as kmean_id FROM c22p k
--Example re-combination query without duplicate votes
WITH kmeans AS (SELECT axh_kmeans_geom_sort(ST_Accum(DISTINCT the_geom), 10, 7) geom, GENERATE_SERIES(1,10) id FROM c22p)
SELECT k.the_geom_webmercator, (SELECT m.id FROM kmeans m ORDER BY m.geom <-> k.the_geom LIMIT 1 ) as kmean_id FROM c22p k
--Example re-combination query with simplification
WITH kmeans AS (SELECT axh_kmeans_geom_sort(ST_Accum(DISTINCT ST_SnapToGrid(the_geom,0.001)), 10, 7) geom, GENERATE_SERIES(1,10) id FROM c22p)
SELECT k.the_geom_webmercator, (SELECT m.id FROM kmeans m ORDER BY m.geom <-> k.the_geom LIMIT 1 ) as kmean_id FROM c22p k
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment