Skip to content

Instantly share code, notes, and snippets.

@afeld
Created September 4, 2012 13:07
Show Gist options
  • Save afeld/3620993 to your computer and use it in GitHub Desktop.
Save afeld/3620993 to your computer and use it in GitHub Desktop.
test for finding cluster in SQL graph
DROP TABLE IF EXISTS profile_matches;
DROP TABLE IF EXISTS profiles;
-- the "nodes"
CREATE TABLE profiles (
id SERIAL PRIMARY KEY,
service text,
username text
);
-- the "edges"
CREATE TABLE profile_matches (
id SERIAL PRIMARY KEY,
profile1_id int REFERENCES profiles(id),
profile2_id int REFERENCES profiles(id)
-- etc.
);
INSERT INTO profiles (id, service, username) VALUES
(1, 'twitter', 'aidanfeldman'),
(2, 'facebook', 'aidan.feldman'),
(3, 'github', 'afeld'),
(4, 'jux', 'aidan'),
(5, 'twitter', 'bipsterite'),
(6, 'github', 'jsteinbe');
INSERT INTO profile_matches (profile1_id, profile2_id) VALUES
(1, 2), -- AF twitter to facebook
(2, 3), -- AF facebook to github
(3, 4), -- AF github to jux
(5, 6); -- JS twitter to github
WITH RECURSIVE search_graph(path, last_profile1, last_profile2) AS (
SELECT ARRAY[id], id, id
FROM profiles WHERE id = 2 -- start where we want to start!
UNION ALL
SELECT sg.path || m.profile2_id || m.profile1_id, m.profile1_id, m.profile2_id
FROM search_graph sg
JOIN profile_matches m
ON (m.profile1_id = sg.last_profile2 AND NOT sg.path @> ARRAY[m.profile1_id])
OR (m.profile2_id = sg.last_profile1 AND NOT sg.path @> ARRAY[m.profile2_id])
)
-- Moved where clause to start of graph search
SELECT DISTINCT unnest(path) FROM search_graph; -- duplicates possible
-- returns:
--
-- unnest
-- --------
-- 2
-- *should* return:
--
-- unnest
-- --------
-- 1
-- 2
-- 3
-- 4
DROP TABLE profile_matches;
DROP TABLE profiles;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment