Created
August 13, 2017 17:44
-
-
Save mac2000/9c3943e31453db60aaff7d94143e50be to your computer and use it in GitHub Desktop.
sql 2017 graph demo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=P@ssword" -e "MSSQL_PID=Developer" --cap-add SYS_PTRACE -p 1433:1433 --name sql -it --rm -v C:/Users/AlexandrM/Downloads/ml-100k:/ml-100k -v C:/Users/AlexandrM/Downloads/ml-20m:/ml-20m microsoft/mssql-server-linux | |
-- http://files.grouplens.org/datasets/movielens/ml-20m.zip | |
use master; | |
go | |
DROP DATABASE movies; | |
GO | |
-- Create a graph demo database | |
CREATE DATABASE movies; | |
go | |
USE movies; | |
go | |
-- u.user | |
-- user id | age | gender | occupation | zip code | |
-- 1|24|M|technician|85711 | |
DROP TABLE IF EXISTS users_temp; | |
CREATE TABLE users_temp (user_id INT PRIMARY KEY, age INT, gender VARCHAR(1), occupation VARCHAR(100), zip_code VARCHAR(100)); | |
BULK INSERT users_temp | |
FROM '/ml-100k/u.user' | |
WITH (DATAFILETYPE = 'char', FIRSTROW = 1, FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', TABLOCK) | |
GO | |
DROP TABLE IF EXISTS users; | |
CREATE TABLE users (user_id INT PRIMARY KEY, age INT, gender VARCHAR(1), occupation VARCHAR(100), zip_code VARCHAR(100)) AS NODE; | |
INSERT INTO users SELECT user_id, age, gender, occupation, zip_code FROM users_temp | |
DROP TABLE IF EXISTS users_temp; | |
-- u.genre | |
-- genre | id | |
-- Action|0 | |
DROP TABLE IF EXISTS genres_temp; | |
CREATE TABLE genres_temp (genre VARCHAR(50), genre_id INT PRIMARY KEY); | |
BULK INSERT genres_temp | |
FROM '/ml-100k/u.genre' | |
WITH (DATAFILETYPE = 'char', FIRSTROW = 1, FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', TABLOCK) | |
GO | |
DROP TABLE IF EXISTS genres; | |
CREATE TABLE genres (genre_id INT PRIMARY KEY, genre VARCHAR(50)) AS NODE; | |
INSERT INTO genres SELECT genre_id, genre FROM genres_temp | |
DROP TABLE IF EXISTS genres_temp; | |
-- u.item | |
-- movie id | movie title | release date | video release date | IMDb URL | unknown | Action | Adventure | Animation | Children's | Comedy | Crime | Documentary | Drama | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western | |
-- 1|Toy Story (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0 | |
DROP TABLE IF EXISTS item_temp; | |
CREATE TABLE item_temp (movie_id INT PRIMARY KEY, movie_title VARCHAR(100), release_date SMALLDATETIME, video_release_date SMALLDATETIME, imdb_url VARCHAR(500), genre_0 INT, genre_1 INT, genre_2 INT, genre_3 INT, genre_4 INT, genre_5 INT, genre_6 INT, genre_7 INT, genre_8 INT, genre_9 INT, genre_10 INT, genre_11 INT, genre_12 INT, genre_13 INT, genre_14 INT, genre_15 INT, genre_16 INT, genre_17 INT, genre_18 INT); | |
BULK INSERT item_temp | |
FROM '/ml-100k/u.item' | |
WITH (DATAFILETYPE = 'char', FIRSTROW = 1, FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', TABLOCK) | |
GO | |
DROP TABLE IF EXISTS movies; | |
CREATE TABLE movies (movie_id INT PRIMARY KEY, movie_title VARCHAR(100), release_date SMALLDATETIME, video_release_date SMALLDATETIME, imdb_url VARCHAR(500)) AS NODE; | |
INSERT INTO movies SELECT movie_id, movie_title, release_date, video_release_date, imdb_url FROM item_temp | |
-- u.data | |
-- user id | item id | rating | timestamp | |
-- 196 242 3 881250949 | |
DROP TABLE IF EXISTS ratings_temp; | |
CREATE TABLE ratings_temp (user_id INT, movie_id INT, rating INT, timestamp BIGINT); | |
GO | |
BULK INSERT ratings_temp | |
FROM '/ml-100k/u.data' | |
WITH (DATAFILETYPE = 'char', FIRSTROW = 1, FIELDTERMINATOR = '\t', ROWTERMINATOR = '0x0a', TABLOCK) | |
GO | |
DROP TABLE IF EXISTS ratings; | |
CREATE TABLE ratings (rating INTEGER, date SMALLDATETIME) AS EDGE; | |
INSERT INTO ratings | |
select U.$node_id, M.$node_id, rating, DATEADD(second, ([timestamp] - DATEDIFF(second,GETDATE(),GETUTCDATE())), CAST('1970-01-01 00:00:00' AS datetime)) AS date from ratings_temp as T join movies M ON T.movie_id = M.movie_id join users U ON T.user_id = U.user_id | |
DROP TABLE IF EXISTS ratings_temp; | |
DROP TABLE IF EXISTS belongs; | |
CREATE TABLE belongs AS EDGE; | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 0 where genre_0 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 1 where genre_1 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 2 where genre_2 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 3 where genre_3 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 4 where genre_4 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 5 where genre_5 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 6 where genre_6 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 7 where genre_7 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 8 where genre_8 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 9 where genre_9 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 10 where genre_10 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 11 where genre_11 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 12 where genre_12 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 13 where genre_13 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 14 where genre_14 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 15 where genre_15 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 16 where genre_16 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 17 where genre_17 = 1 | |
insert into belongs select M.$node_id, G.$node_id from item_temp I join movies M ON I.movie_id = M.movie_id join genres G ON G.genre_id = 18 where genre_18 = 1 | |
DROP TABLE IF EXISTS item_temp; | |
-- Get movies belongs to Sci-Fi aka join | |
select m.movie_id, m.movie_title, g.genre from movies AS m, genres AS g, belongs AS b where match(m-(b)->g) and g.genre = 'Sci-Fi' | |
-- Kind of top rated movies | |
select m.movie_title, sum(r.rating) / count(r.rating) AS avg, sum(r.rating) as sum, count(r.rating) as count from movies as m, ratings as r, users as u where match(m<-(r)-u) group by m.movie_title order by avg desc, count desc | |
-- 257 men in black - other users who also like man in the black | |
select u.user_id, u.age, u.gender, u.occupation, r.rating, r.date from users u, movies m, ratings r where match(u-(r)->m) and m.movie_id = 257 | |
-- user 472 favorite genres (aka user profile) | |
select u.user_id, /*u.age, u.gender, u.occupation,*/ g.genre, count(r.rating) as four_plus_ratings_count from users u, ratings r, movies m, genres g, belongs b where match (u-(r)->m-(b)->g) and r.rating >= 4 | |
and u.user_id = 472 | |
group by u.user_id, /*u.age, u.gender, u.occupation,*/ g.genre | |
order by four_plus_ratings_count desc | |
-- coloborative recomendation | |
SELECT | |
TOP 10 | |
similar.movie_id, | |
similar.movie_title, | |
COUNT(*) as total_ratings | |
from | |
movies as my_movie, | |
users as U, | |
ratings as ratings_other, | |
ratings as ratings_this, | |
movies as similar | |
where my_movie.movie_id = 257 and match(similar<-(ratings_other)-U-(ratings_this)->my_movie) | |
GROUP BY similar.movie_id, similar.movie_title | |
ORDER BY COUNT(*) DESC | |
-- similar users | |
SELECT | |
me.user_id AS me, | |
others.user_id AS other_id, | |
others.age, | |
others.gender, | |
others.occupation, | |
my_movies.movie_id, | |
my_movies.movie_title, | |
my_ratings.rating as my_rating, | |
other_ratings.rating as others_rating, | |
ABS(my_ratings.rating-other_ratings.rating) as delta | |
from | |
users as me, | |
ratings as my_ratings, | |
movies as my_movies, | |
ratings as other_ratings, | |
users as others | |
where match(me-(my_ratings)->my_movies<-(other_ratings)-others) and me.user_id = 472 | |
order by delta | |
-- top 10 similar users, grouped | |
select top 10 me, other_id, sum(delta) as delta from ( | |
SELECT | |
me.user_id AS me, | |
others.user_id AS other_id, | |
ABS(my_ratings.rating-other_ratings.rating) as delta | |
from | |
users as me, | |
ratings as my_ratings, | |
movies as my_movies, | |
ratings as other_ratings, | |
users as others | |
where match(me-(my_ratings)->my_movies<-(other_ratings)-others) and me.user_id = 472 | |
) as q group by me, other_id | |
order by delta | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment