-
-
Save kiyoto/3085703 to your computer and use it in GitHub Desktop.
Book-Crossing Dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 1.1.1 all users v. active users -- | |
td query -w -d book_crossing_dataset " | |
SELECT t1.cnt AS all_users, t2.cnt AS active_users, ROUND(t2.cnt/t1.cnt*100) AS active_rate | |
FROM | |
( | |
SELECT COUNT(distinct user_id) as cnt, 1 AS one | |
FROM users | |
) t1 | |
JOIN | |
( | |
SELECT COUNT(distinct user_id) as cnt, 1 AS one | |
FROM ratings | |
) t2 | |
ON | |
(t1.one=t2.one) | |
" | |
-- 1.1.2 mutually exclusive users -- | |
td query -w -d book_crossing_dataset " | |
SELECT t AS type, cnt | |
FROM | |
( | |
SELECT COUNT(*) AS cnt, 'only in users' AS t | |
FROM | |
( | |
SELECT user_id | |
FROM users | |
GROUP BY user_id | |
) t1 | |
LEFT OUTER JOIN | |
( | |
SELECT user_id | |
FROM ratings | |
GROUP BY user_id | |
) t2 | |
ON | |
(t1.user_id=t2.user_id) | |
WHERE t2.user_id IS NULL | |
UNION ALL | |
SELECT COUNT(*) AS cnt, 'only in ratings' AS t | |
FROM | |
( | |
SELECT user_id | |
FROM ratings | |
GROUP BY user_id | |
) t1 | |
LEFT OUTER JOIN | |
( | |
SELECT user_id | |
FROM users | |
GROUP BY user_id | |
) t2 | |
ON | |
(t1.user_id=t2.user_id) | |
WHERE t2.user_id IS NULL | |
UNION ALL | |
SELECT COUNT(*) AS cnt, 'both' AS t | |
FROM | |
( | |
SELECT user_id | |
FROM ratings | |
GROUP BY user_id | |
) t1 | |
JOIN | |
( | |
SELECT user_id | |
FROM users | |
GROUP BY user_id | |
) t2 | |
ON | |
(t1.user_id=t2.user_id) | |
) o1 | |
" | |
-- 1.2.1 all books v. "active" (rated) books -- | |
td query -w -d book_crossing_dataset " | |
SELECT t1.cnt AS all_books, t2.cnt AS active_books, ROUND(t2.cnt/t1.cnt*100) AS active_rate | |
FROM | |
( | |
SELECT COUNT(distinct isbn) as cnt, 1 AS one | |
FROM books | |
) t1 | |
JOIN | |
( | |
SELECT COUNT(distinct isbn) as cnt, 1 AS one | |
FROM ratings | |
) t2 | |
ON | |
(t1.one=t2.one) | |
" | |
-- 1.2.2 Mutually exclusive books -- | |
td query -w -d book_crossing_dataset " | |
SELECT t AS type, cnt | |
FROM | |
( | |
SELECT COUNT(*) AS cnt, 'only in books' AS t | |
FROM | |
( | |
SELECT isbn | |
FROM books | |
GROUP BY isbn | |
) t1 | |
LEFT OUTER JOIN | |
( | |
SELECT isbn | |
FROM ratings | |
GROUP BY isbn | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE t2.isbn IS NULL | |
UNION ALL | |
SELECT COUNT(*) AS cnt, 'only in ratings' AS t | |
FROM | |
( | |
SELECT isbn | |
FROM ratings | |
GROUP BY isbn | |
) t1 | |
LEFT OUTER JOIN | |
( | |
SELECT isbn | |
FROM books | |
GROUP BY isbn | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE t2.isbn IS NULL | |
UNION ALL | |
SELECT COUNT(*) AS cnt, 'both' AS t | |
FROM | |
( | |
SELECT isbn | |
FROM ratings | |
GROUP BY isbn | |
) t1 | |
JOIN | |
( | |
SELECT isbn | |
FROM books | |
GROUP BY isbn | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
) o1 | |
" | |
-- 1.3.1 The number of ratings -- | |
td query -w -d book_crossing_dataset " | |
SELECT COUNT(*) AS all_reviews | |
FROM ratings | |
" | |
-- 1.3.2 # of valid reviews and the average of all reviews -- | |
td query -w -d book_crossing_dataset " | |
SELECT COUNT(*) AS valid_reviews, ROUND(AVG(book_rating)*100)/100 AS avg_of_reviews | |
FROM ratings | |
WHERE 0 < book_rating | |
" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment