Created
June 28, 2012 01:25
-
-
Save doryokujin/3008099 to your computer and use it in GitHub Desktop.
Book-Crossing Dataset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 1.1.1 登録ユーザー数, アクティブユーザー数 -- | |
td query -w -d book_crossing_dataset " | |
SELECT t1.cnt AS all_users, t2.cnt AS active_users, ROUND(t2.cnt/t1.cnt*100) AS active_rate | |
FROM | |
( | |
SELECT COUNT(distinct user_id) as cnt, 1 AS one | |
FROM users | |
) t1 | |
JOIN | |
( | |
SELECT COUNT(distinct user_id) as cnt, 1 AS one | |
FROM ratings | |
) t2 | |
ON | |
(t1.one=t2.one) | |
" | |
-- 1.1.2 片方のテーブルにしか含まれないユーザー数 -- | |
td query -w -d book_crossing_dataset " | |
SELECT t AS type, cnt | |
FROM | |
( | |
SELECT COUNT(*) AS cnt, 'only in users' AS t | |
FROM | |
( | |
SELECT user_id | |
FROM users | |
GROUP BY user_id | |
) t1 | |
LEFT OUTER JOIN | |
( | |
SELECT user_id | |
FROM ratings | |
GROUP BY user_id | |
) t2 | |
ON | |
(t1.user_id=t2.user_id) | |
WHERE t2.user_id IS NULL | |
UNION ALL | |
SELECT COUNT(*) AS cnt, 'only in ratings' AS t | |
FROM | |
( | |
SELECT user_id | |
FROM ratings | |
GROUP BY user_id | |
) t1 | |
LEFT OUTER JOIN | |
( | |
SELECT user_id | |
FROM users | |
GROUP BY user_id | |
) t2 | |
ON | |
(t1.user_id=t2.user_id) | |
WHERE t2.user_id IS NULL | |
UNION ALL | |
SELECT COUNT(*) AS cnt, 'both' AS t | |
FROM | |
( | |
SELECT user_id | |
FROM ratings | |
GROUP BY user_id | |
) t1 | |
JOIN | |
( | |
SELECT user_id | |
FROM users | |
GROUP BY user_id | |
) t2 | |
ON | |
(t1.user_id=t2.user_id) | |
) o1 | |
" | |
-- 1.2.1 登録ブック数, アクティブブック数 -- | |
td query -w -d book_crossing_dataset " | |
SELECT t1.cnt AS all_books, t2.cnt AS active_books, ROUND(t2.cnt/t1.cnt*100) AS active_rate | |
FROM | |
( | |
SELECT COUNT(distinct isbn) as cnt, 1 AS one | |
FROM books | |
) t1 | |
JOIN | |
( | |
SELECT COUNT(distinct isbn) as cnt, 1 AS one | |
FROM ratings | |
) t2 | |
ON | |
(t1.one=t2.one) | |
" | |
-- 1.2.2 片方のテーブルにしか含まれないブック数 -- | |
td query -w -d book_crossing_dataset " | |
SELECT t AS type, cnt | |
FROM | |
( | |
SELECT COUNT(*) AS cnt, 'only in books' AS t | |
FROM | |
( | |
SELECT isbn | |
FROM books | |
GROUP BY isbn | |
) t1 | |
LEFT OUTER JOIN | |
( | |
SELECT isbn | |
FROM ratings | |
GROUP BY isbn | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE t2.isbn IS NULL | |
UNION ALL | |
SELECT COUNT(*) AS cnt, 'only in ratings' AS t | |
FROM | |
( | |
SELECT isbn | |
FROM ratings | |
GROUP BY isbn | |
) t1 | |
LEFT OUTER JOIN | |
( | |
SELECT isbn | |
FROM books | |
GROUP BY isbn | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
WHERE t2.isbn IS NULL | |
UNION ALL | |
SELECT COUNT(*) AS cnt, 'both' AS t | |
FROM | |
( | |
SELECT isbn | |
FROM ratings | |
GROUP BY isbn | |
) t1 | |
JOIN | |
( | |
SELECT isbn | |
FROM books | |
GROUP BY isbn | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
) o1 | |
" | |
-- 1.3.1 総評価数 -- | |
td query -w -d book_crossing_dataset " | |
SELECT COUNT(*) AS all_reviews | |
FROM ratings | |
" | |
-- 1.3.2 有効評価数,平均評価値 -- | |
td query -w -d book_crossing_dataset " | |
SELECT COUNT(*) AS valid_reviews, ROUND(AVG(book_rating)*100)/100 AS avg_of_reviews | |
FROM ratings | |
WHERE 0 < book_rating | |
" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment