Last active
October 30, 2017 22:57
-
-
Save sovetnik/18bbade85205667d6e8b881caa28df30 to your computer and use it in GitHub Desktop.
Data and query to select authors which not collaborate with others
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Create authors | |
CREATE TABLE authors(id serial PRIMARY KEY, name char(255)); | |
-- Single author of many books | |
INSERT INTO authors(name) | |
VALUES ('William Gibson'); | |
-- Collective of authors of one book | |
INSERT INTO authors(name) | |
VALUES ('Erich Gamma'), ('Richard Helm'), ('Ralph Johnson'), ('John Vlissides '); | |
-- Main author and collaborator | |
INSERT INTO authors(name) | |
VALUES ('Daniel Kahneman'), ('Amos Twersky'); | |
-- Create books | |
CREATE TABLE books(id serial PRIMARY KEY, title char(255)); | |
-- William Gibson books | |
INSERT INTO books(title) | |
VALUES('Neuromancer'), ('Count Zero'), ('Mona Lisa Overdrive'); | |
-- GoF book | |
INSERT INTO books(title) | |
VALUES('Design Patterns: Elements of Reusable Object-Oriented Software'); | |
-- Only Kahneman | |
INSERT INTO books(title) | |
VALUES('Thinking, Fast and Slow'); | |
-- Kahneman and Tversky as coauthor | |
INSERT INTO books(title) | |
VALUES('Choices, Values and Frames'); | |
-- Add authors and books relations | |
CREATE TABLE book_authors( | |
author_id int REFERENCES authors(id), | |
book_id int REFERENCES books(id), | |
main_author boolean | |
); | |
-- with constraints | |
-- book can be written only once | |
CREATE UNIQUE INDEX authorship_idx | |
ON book_authors(author_id, book_id); | |
-- book can have only one main author | |
CREATE UNIQUE INDEX main_author_idx | |
ON book_authors(book_id, main_author) | |
WHERE main_author = TRUE; | |
--and data | |
INSERT INTO book_authors(author_id, book_id, main_author) | |
VALUES (1,1, FALSE), (1,2, FALSE), (1,3, FALSE), | |
(2,4, FALSE), (3,4, FALSE), (4,4, FALSE), (5,4, FALSE), | |
(6,5, FALSE), (6,6, TRUE), (7,6, FALSE); | |
-- Query to get all authors not seen in collaboration | |
EXPLAIN WITH | |
books_aggregate(book_id, authors_count) as( | |
SELECT book_id, count(author_id) as authors_count | |
FROM book_authors | |
GROUP BY book_id | |
ORDER BY book_id | |
), | |
collective_books(id) as ( | |
SELECT book_id | |
FROM books_aggregate | |
WHERE authors_count > 1 | |
), | |
collective_authors(id) as ( | |
select ba.author_id | |
FROM book_authors ba | |
JOIN collective_books cb | |
ON ba.book_id = cb.id | |
) | |
SELECT * | |
FROM authors a | |
WHERE a.id | |
NOT IN (SELECT id FROM collective_authors); | |
/* +----------------------------------------------------------------------------------------+ */ | |
/* | QUERY PLAN | */ | |
/* |----------------------------------------------------------------------------------------| */ | |
/* | Seq Scan on authors a (cost=126.40..137.27 rows=35 width=1028) | */ | |
/* | Filter: (NOT (hashed SubPlan 4)) | */ | |
/* | CTE books_aggregate | */ | |
/* | -> Sort (cost=53.54..54.04 rows=200 width=12) | */ | |
/* | Sort Key: book_authors.book_id | */ | |
/* | -> HashAggregate (cost=43.90..45.90 rows=200 width=12) | */ | |
/* | Group Key: book_authors.book_id | */ | |
/* | -> Seq Scan on book_authors (cost=0.00..32.60 rows=2260 width=8) | */ | |
/* | CTE collective_books | */ | |
/* | -> CTE Scan on books_aggregate (cost=0.00..4.50 rows=67 width=4) | */ | |
/* | Filter: (authors_count > 1) | */ | |
/* | CTE collective_authors | */ | |
/* | -> Hash Join (cost=2.18..50.82 rows=757 width=4) | */ | |
/* | Hash Cond: (ba.book_id = cb.id) | */ | |
/* | -> Seq Scan on book_authors ba (cost=0.00..32.60 rows=2260 width=8) | */ | |
/* | -> Hash (cost=1.34..1.34 rows=67 width=4) | */ | |
/* | -> CTE Scan on collective_books cb (cost=0.00..1.34 rows=67 width=4) | */ | |
/* | SubPlan 4 | */ | |
/* | -> CTE Scan on collective_authors (cost=0.00..15.14 rows=757 width=4) | */ | |
/* +----------------------------------------------------------------------------------------+ */ | |
CREATE INDEX book_idx ON book_authors(book_id); | |
-- Same query after ad index | |
/* +---------------------------------------------------------------------------------------+ */ | |
/* | QUERY PLAN | */ | |
/* |---------------------------------------------------------------------------------------| */ | |
/* | Seq Scan on authors a (cost=3.00..13.87 rows=35 width=1028) | */ | |
/* | Filter: (NOT (hashed SubPlan 4)) | */ | |
/* | CTE books_aggregate | */ | |
/* | -> GroupAggregate (cost=1.27..1.44 rows=10 width=12) | */ | |
/* | Group Key: book_authors.book_id | */ | |
/* | -> Sort (cost=1.27..1.29 rows=10 width=8) | */ | |
/* | Sort Key: book_authors.book_id | */ | |
/* | -> Seq Scan on book_authors (cost=0.00..1.10 rows=10 width=8) | */ | |
/* | CTE collective_books | */ | |
/* | -> CTE Scan on books_aggregate (cost=0.00..0.22 rows=3 width=4) | */ | |
/* | Filter: (authors_count > 1) | */ | |
/* | CTE collective_authors | */ | |
/* | -> Hash Join (cost=0.10..1.26 rows=3 width=4) | */ | |
/* | Hash Cond: (ba.book_id = cb.id) | */ | |
/* | -> Seq Scan on book_authors ba (cost=0.00..1.10 rows=10 width=8) | */ | |
/* | -> Hash (cost=0.06..0.06 rows=3 width=4) | */ | |
/* | -> CTE Scan on collective_books cb (cost=0.00..0.06 rows=3 width=4) | */ | |
/* | SubPlan 4 | */ | |
/* | -> CTE Scan on collective_authors (cost=0.00..0.06 rows=3 width=4) | */ | |
/* +---------------------------------------------------------------------------------------+ */ | |
/* -- Export to csv */ | |
/* COPY( */ | |
/* WITH */ | |
/* books_aggregate(book_id, authors_count) as( */ | |
/* SELECT book_id, count(author_id) as authors_count */ | |
/* FROM book_authors */ | |
/* GROUP BY book_id */ | |
/* ORDER BY book_id */ | |
/* ), */ | |
/* collective_books(id) as ( */ | |
/* SELECT book_id */ | |
/* FROM books_aggregate */ | |
/* WHERE authors_count > 1 */ | |
/* ), */ | |
/* collective_authors(id) as ( */ | |
/* select ba.author_id */ | |
/* FROM book_authors ba */ | |
/* JOIN collective_books cb */ | |
/* ON ba.book_id = cb.id */ | |
/* ) */ | |
/* SELECT * */ | |
/* FROM authors a */ | |
/* WHERE a.id */ | |
/* NOT IN (SELECT id FROM collective_authors) */ | |
/* ) TO '/tmp/mustread.csv' WITH CSV HEADER; */ | |
/* --Cleanup */ | |
/* DROP TABLE book_authors; */ | |
/* DROP TABLE authors; */ | |
/* DROP TABLE books; */ | |
/* DROP INDEX main_author_idx; */ | |
/* DROP INDEX authorship_idx; */ | |
/* DROP INDEX book_idx; */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment