Last active
July 30, 2018 04:15
-
-
Save jesusgoku/150d5521e50439f260b3e08de17734f2 to your computer and use it in GitHub Desktop.
MySQL - Query for selected related entries by tags scoring - Inspired on: https://stackoverflow.com/a/2153211/1236791 - http://en.wikipedia.org/wiki/Jaccard_index
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE IF NOT EXISTS layers ( | |
id INT unsigned NOT NULL auto_increment, | |
name VARCHAR(255) NOT NULL, | |
PRIMARY KEY(id) | |
); | |
CREATE TABLE IF NOT EXISTS tags ( | |
id INT unsigned NOT NULL auto_increment, | |
name VARCHAR(255) NOT NULL, | |
PRIMARY KEY(id) | |
); | |
CREATE TABLE IF NOT EXISTS layers_tags ( | |
layer_id INT unsigned NOT NULL, | |
tag_id INT unsigned NOT NULL, | |
CONSTRAINT fk_layer FOREIGN KEY(layer_id) REFERENCES layers(id), | |
CONSTRAINT fk_tag FOREIGN KEY(tag_id) REFERENCES tags(id) | |
); | |
INSERT INTO layers (name) VALUES | |
('Paseo por el campo') | |
, ('Fotografía de campo') | |
, ('Computación cuantica') | |
, ('Computación fotografica') | |
; | |
INSERT INTO tags (name) VALUES | |
('Campo') | |
, ('Computación') | |
, ('Fotografía') | |
, ('Cuantica') | |
, ('Paseo') | |
; | |
INSERT INTO layers_tags () VALUES | |
(1, 1) | |
, (1, 5) | |
, (2, 1) | |
, (2, 3) | |
, (3, 4) | |
, (3, 2) | |
, (4, 2) | |
, (4, 3) | |
; | |
-- Layers | |
SELECT l.id, l.name FROM layers AS l; | |
-- Tags | |
SELECT t.id, t.name FROM tags AS t; | |
-- Layers - Tags | |
SELECT l.*, t.name FROM layers AS l JOIN layers_tags AS lt ON l.id = lt.layer_id JOIN tags AS t ON t.id = lt.tag_id; | |
-- Tags name for an layer | |
SELECT t.name FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 1; | |
-- Intersection | |
SELECT t.name FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 1 AND lt.tag_id IN ( | |
SELECT t.id FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 2 | |
); | |
-- Intersection IDs | |
SELECT lt.tag_id FROM layers_tags AS lt WHERE lt.layer_id = 1 AND lt.tag_id IN (SELECT lt.tag_id FROM layers_tags AS lt WHERE lt.layer_id = 2); | |
-- Intersection count | |
SELECT COUNT(t.id) FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 1 AND lt.tag_id IN (SELECT t.id FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 2); | |
-- Union | |
SELECT t.name FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 1 | |
UNION SELECT t.name FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 2; | |
-- Union count | |
SELECT COUNT(u.id) FROM ( | |
SELECT t.id FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 1 | |
UNION SELECT t.id FROM layers_tags AS lt JOIN tags AS t ON lt.tag_id = t.id WHERE lt.layer_id = 2 | |
) AS u; | |
-- Union IDs | |
SELECT lt.tag_id FROM layers_tags AS lt WHERE lt.layer_id = 1 | |
UNION SELECT lt.tag_id FROM layers_tags AS lt WHERE lt.layer_id = 2 | |
; | |
-- GROUP_CONCAT | |
SELECT l.*, GROUP_CONCAT(t.name) AS tags FROM layers AS l JOIN layers_tags AS lt ON l.id = lt.layer_id JOIN tags AS t ON t.id = lt.tag_id GROUP BY(l.id); | |
-- Tags id | |
SELECT | |
l.id | |
, l.name | |
, (SELECT GROUP_CONCAT(lt.tag_id) FROM layers_tags AS lt WHERE lt.layer_id = l.id GROUP BY lt.layer_id) AS tags_id | |
FROM layers AS l | |
; | |
-- Related tags order by score | |
SET @layer := 4; | |
SELECT | |
l.id | |
, l.name | |
-- , (SELECT COUNT(lt.tag_id) FROM layers_tags AS lt WHERE lt.layer_id = l.id) AS tags_count | |
, ( | |
SELECT COUNT(lt.tag_id) | |
FROM layers_tags AS lt | |
WHERE lt.layer_id = l.id | |
AND lt.tag_id IN (SELECT lt.tag_id FROM layers_tags AS lt WHERE lt.layer_id = @layer) | |
) / ( | |
SELECT COUNT(DISTINCT lt.tag_id) FROM layers_tags AS lt WHERE lt.layer_id = @layer OR lt.layer_id = l.id | |
) AS score | |
FROM layers AS l | |
WHERE l.id != @layer | |
ORDER BY score DESC | |
; | |
-- Layers order by scoring acording tag | |
SET @tags := "2,4"; | |
SET @tags_count := 2; | |
SELECT | |
l.id | |
, l.name | |
, ( | |
SELECT COUNT(lt.tag_id) | |
FROM layers_tags AS lt | |
WHERE lt.layer_id = l.id | |
-- AND lt.tag_id IN (2, 4) -- @tags | |
AND FIND_IN_SET(lt.tag_id, @tags) | |
) / @tags_count AS score | |
FROM layers AS l | |
ORDER BY score DESC | |
; | |
-- Example on: http://sqlfiddle.com/#!9/add109/2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment