Skip to content

Instantly share code, notes, and snippets.

@OrenBochman
Created July 22, 2013 09:07
Show Gist options
  • Save OrenBochman/6052452 to your computer and use it in GitHub Desktop.
Save OrenBochman/6052452 to your computer and use it in GitHub Desktop.
Finding closely affiliated friends. These queiries are built to operate on a small datasets at http://sqlfiddle.com/#!2/907b6/1. For work on a real wiki numbers in these queries will require fine tuning.
SELECT rev_page
FROM revision_userindex
INNER JOIN page ON page.page_id = rev_page
WHERE rev_user_text = "Jimbo Wales"
AND page_namespace IN (0,1)
GROUP BY rev_page
HAVING count(rev_page) > 20
ORDER BY COUNT(*) DESC
Select REV_USER_TEXT FROM
(
Select rev_user_text, page_title,COUNT(rev_page)
From revision_userindex
INNER JOIN page ON page_id = rev_page
Where rev_page in
(SELECT rev_page
FROM revision_userindex
INNER JOIN page ON page.page_id = rev_page
WHERE rev_user_text = 'Adam'
AND page_namespace IN (0,1)
GROUP BY rev_page
HAVING count(rev_page) > 2
ORDER BY COUNT(*) DESC
)
AND rev_user != 1
GROUP BY rev_user,page_title
HAVING count(rev_page) > 2
ORDER BY COUNT(*) DESC
) PALS
GROUP BY rev_user_text
HAVING COUNT(rev_user_text)>1
LIMIT 4 ;
CREATE TABLE namespaces( `i` int(11) unsigned, `v` blob);
INSERT INTO namespaces VALUES (0,''),(1,'Talk'),(2,'User'),(2,'UserTalk');
CREATE TABLE page
(
`page_id` int(8) unsigned,
`page_namespace` int(11),
`page_title` varchar(255),
-- `page_restrictions` tinyblob,
-- `page_counter` bigint(20) unsigned,
-- `page_is_redirect` tinyint(1) unsigned,
-- `page_is_new` tinyint(1) unsigned,
-- `page_random` double unsigned,
-- `page_touched` varbinary(14),
-- `page_latest` int(8) unsigned,
`page_len` int(8) unsigned
);
INSERT INTO page VALUES
(0,0,'Apple',10),(1,1,'Apple',10),
(2,0,'Brick',10),(3,1,'Brick',10),
(4,0,'Car',10),(5,1,'Car',10),
(6,0,'Door',10),(7,1,'Door',10),
(8,0,'Echo',10),(9,1,'Echo',10),
(10,0,'Frame',10),(11,1,'Frame',10);
CREATE TABLE revision_userindex
(`rev_id` int(8) unsigned,
`rev_page` int(8) unsigned,
`rev_text_id` int,
`rev_comment` varchar(44),
`rev_user` int,
`rev_user_text` varchar(255),
`rev_timestamp` varchar(14),
`rev_minor_edit` int,
`rev_deleted` int,
`rev_len` int,
`rev_parent_id` int,
`rev_sha1` varchar(31)
);
INSERT INTO revision_userindex VALUES
(10, 0, 0, 'ce', 1, 'Adam', 20130611000000, 0, 0, 78, 0, '6ow'),
(11, 0, 0, 'ce', 1, 'Adam', 20130612000000, 0, 0, 78, 0, '6ow'),
(12, 0, 0, 'ce', 1, 'Adam', 20130613000000, 0, 0, 78, 0, '6ow'),
(13, 2, 0, 'ce', 1, 'Adam', 20130613003600, 0, 0, 78, 0, '6ow'),
(14, 2, 0, 'ce', 1, 'Adam', 20130613007200, 0, 0, 78, 0, '6ow'),
(15, 2, 0, 'ce', 1, 'Adam', 20130613010800, 0, 0, 78, 0, '6ow'),
(16, 2, 0, 'ce', 1, 'Adam', 20130614030000, 0, 0, 78, 0, '6ow'),
(17, 4, 0, 'ce', 1, 'Adam', 20130614033001, 0, 0, 78, 0, '6ow'),
(18, 4, 0, 'ce', 1, 'Adam', 20130614036001, 0, 0, 78, 0, '6ow'),
(19, 4, 0, 'ce', 1, 'Adam', 20130614039001, 0, 0, 78, 0, '6ow'),
(20, 6, 0, 'ce', 1, 'Adam', 20130614040000, 0, 0, 78, 0, '6ow'),
(21, 6, 0, 'ce', 1, 'Adam', 20130616060001, 0, 0, 78, 0, '6ow'),
(22, 6, 0, 'ce', 1, 'Adam', 20130616061000, 0, 0, 78, 0, '6ow'),
(23, 8, 0, 'ce', 1, 'Adam', 20130616062000, 0, 0, 78, 0, '6ow'),
(22, 0, 0, 'ce', 2, 'Beth', 20130713000000, 0, 0, 78, 0, '6ow'),
(23, 0, 0, 'ce', 2, 'Beth', 20130713003600, 0, 0, 78, 0, '6ow'),
(24, 0, 0, 'ce', 2, 'Beth', 20130713007200, 0, 0, 78, 0, '6ow'),
(25, 0, 0, 'ce', 2, 'Beth', 20130713010800, 0, 0, 78, 0, '6ow'),
(26, 2, 0, 'ce', 2, 'Beth', 20130714030000, 0, 0, 78, 0, '6ow'),
(27, 2, 0, 'ce', 2, 'Beth', 20130714033001, 0, 0, 78, 0, '6ow'),
(28, 2, 0, 'ce', 2, 'Beth', 20130714036001, 0, 0, 78, 0, '6ow'),
(29, 2, 0, 'ce', 2, 'Beth', 20130714039001, 0, 0, 78, 0, '6ow'),
(30, 4, 0, 'ce', 3, 'Cory', 20130814040000, 0, 0, 78, 0, '6ow'),
(31, 4, 0, 'ce', 3, 'Cory', 20130816060001, 0, 0, 78, 0, '6ow'),
(32, 4, 0, 'ce', 3, 'Cory', 20130816061000, 0, 0, 78, 0, '6ow'),
(33, 4, 0, 'ce', 3, 'Cory', 20130816062000, 0, 0, 78, 0, '6ow'),
(34, 6, 0, 'ce', 3, 'Cory', 20130813007200, 0, 0, 78, 0, '6ow'),
(35, 6, 0, 'ce', 3, 'Cory', 20130813010800, 0, 0, 78, 0, '6ow'),
(36, 6, 0, 'ce', 3, 'Cory', 20130814030000, 0, 0, 78, 0, '6ow'),
(37, 6, 0, 'ce', 3, 'Cory', 20130814033001, 0, 0, 78, 0, '6ow'),
(38, 6, 0, 'ce', 3, 'Cory', 20130814036001, 0, 0, 78, 0, '6ow'),
(39, 6, 0, 'ce', 4, 'Dirk', 20130814039001, 0, 0, 78, 0, '6ow'),
(40, 6, 0, 'ce', 4, 'Dirk', 20130814040000, 0, 0, 78, 0, '6ow'),
(41, 6, 0, 'ce', 4, 'Dirk', 20130816060001, 0, 0, 78, 0, '6ow'),
(42, 6, 0, 'ce', 4, 'Dirk', 20130816061000, 0, 0, 78, 0, '6ow'),
(43, 6, 0, 'ce', 4, 'Dirk', 20130816062000, 0, 0, 78, 0, '6ow'),
(44, 6, 0, 'ce', 4, 'Dirk', 20130814063000, 0, 0, 78, 0, '6ow'),
(45, 6, 0, 'ce', 4, 'Dirk', 20130816064001, 0, 0, 78, 0, '6ow'),
(46, 6, 0, 'ce', 4, 'Dirk', 20130816065005, 0, 0, 78, 0, '6ow'),
(47, 6, 0, 'ce', 4, 'Dirk', 20130816066010, 0, 0, 78, 0, '6ow'),
(48, 6, 0, 'ce', 4, 'Dirk', 20130816066015, 0, 0, 78, 0, '6ow'),
(52, 2, 0, 'ce', 2, 'Beth', 20130713000000, 0, 0, 78, 0, '6ow'),
(53, 2, 0, 'ce', 2, 'Beth', 20130713003600, 0, 0, 78, 0, '6ow'),
(54, 2, 0, 'ce', 2, 'Beth', 20130713007200, 0, 0, 78, 0, '6ow'),
(55, 6, 0, 'ce', 2, 'Beth', 20130713010800, 0, 0, 78, 0, '6ow'),
(56, 6, 0, 'ce', 2, 'Beth', 20130714030000, 0, 0, 78, 0, '6ow'),
(57, 6, 0, 'ce', 2, 'Beth', 20130714033001, 0, 0, 78, 0, '6ow'),
(58, 6, 0, 'ce', 2, 'Beth', 20130714036001, 0, 0, 78, 0, '6ow'),
(59, 6, 0, 'ce', 2, 'Beth', 20130714039001, 0, 0, 78, 0, '6ow'),
(60, 6, 0, 'ce', 2, 'Beth', 20130714039001, 0, 0, 78, 0, '6ow');
Finding closely affiliated friends.
These queiries are built to operate on a small datasets at http://sqlfiddle.com/#!2/907b6/1.
For work on a real wiki numbers in these queries will require fine tuning.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment