Skip to content

Instantly share code, notes, and snippets.

@kiyoto
Forked from doryokujin/basic_information.sql
Created July 10, 2012 19:33
Show Gist options
  • Save kiyoto/3085703 to your computer and use it in GitHub Desktop.
Save kiyoto/3085703 to your computer and use it in GitHub Desktop.
Book-Crossing Dataset
-- 1.1.1 all users v. active users --
td query -w -d book_crossing_dataset "
SELECT t1.cnt AS all_users, t2.cnt AS active_users, ROUND(t2.cnt/t1.cnt*100) AS active_rate
FROM
(
SELECT COUNT(distinct user_id) as cnt, 1 AS one
FROM users
) t1
JOIN
(
SELECT COUNT(distinct user_id) as cnt, 1 AS one
FROM ratings
) t2
ON
(t1.one=t2.one)
"
-- 1.1.2 mutually exclusive users --
td query -w -d book_crossing_dataset "
SELECT t AS type, cnt
FROM
(
SELECT COUNT(*) AS cnt, 'only in users' AS t
FROM
(
SELECT user_id
FROM users
GROUP BY user_id
) t1
LEFT OUTER JOIN
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
WHERE t2.user_id IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'only in ratings' AS t
FROM
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t1
LEFT OUTER JOIN
(
SELECT user_id
FROM users
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
WHERE t2.user_id IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'both' AS t
FROM
(
SELECT user_id
FROM ratings
GROUP BY user_id
) t1
JOIN
(
SELECT user_id
FROM users
GROUP BY user_id
) t2
ON
(t1.user_id=t2.user_id)
) o1
"
-- 1.2.1 all books v. "active" (rated) books --
td query -w -d book_crossing_dataset "
SELECT t1.cnt AS all_books, t2.cnt AS active_books, ROUND(t2.cnt/t1.cnt*100) AS active_rate
FROM
(
SELECT COUNT(distinct isbn) as cnt, 1 AS one
FROM books
) t1
JOIN
(
SELECT COUNT(distinct isbn) as cnt, 1 AS one
FROM ratings
) t2
ON
(t1.one=t2.one)
"
-- 1.2.2 Mutually exclusive books --
td query -w -d book_crossing_dataset "
SELECT t AS type, cnt
FROM
(
SELECT COUNT(*) AS cnt, 'only in books' AS t
FROM
(
SELECT isbn
FROM books
GROUP BY isbn
) t1
LEFT OUTER JOIN
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
WHERE t2.isbn IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'only in ratings' AS t
FROM
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t1
LEFT OUTER JOIN
(
SELECT isbn
FROM books
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
WHERE t2.isbn IS NULL
UNION ALL
SELECT COUNT(*) AS cnt, 'both' AS t
FROM
(
SELECT isbn
FROM ratings
GROUP BY isbn
) t1
JOIN
(
SELECT isbn
FROM books
GROUP BY isbn
) t2
ON
(t1.isbn=t2.isbn)
) o1
"
-- 1.3.1 The number of ratings --
td query -w -d book_crossing_dataset "
SELECT COUNT(*) AS all_reviews
FROM ratings
"
-- 1.3.2 # of valid reviews and the average of all reviews --
td query -w -d book_crossing_dataset "
SELECT COUNT(*) AS valid_reviews, ROUND(AVG(book_rating)*100)/100 AS avg_of_reviews
FROM ratings
WHERE 0 < book_rating
"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment