Created
October 8, 2022 13:18
-
-
Save vatsalsaglani/3c1e3490b0871528743f6ac31bec7176 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "import torch\n", | |
| "import torch.nn as nn\n", | |
| "import torch.nn.functional as F\n", | |
| "import random\n", | |
| "import sys\n", | |
| "sys.path.append(\"../\")\n", | |
| "from constants import *" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "movies_df = pd.read_csv(\"../data/ml-25m/ml-25m/movies.csv\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "62423" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(movies_df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>movieId</th>\n", | |
| " <th>title</th>\n", | |
| " <th>genres</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>62413</th>\n", | |
| " <td>209145</td>\n", | |
| " <td>Liberté (2019)</td>\n", | |
| " <td>Drama</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62414</th>\n", | |
| " <td>209147</td>\n", | |
| " <td>The Carpet of Horror (1962)</td>\n", | |
| " <td>Crime|Horror</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62415</th>\n", | |
| " <td>209151</td>\n", | |
| " <td>Mao Zedong 1949 (2019)</td>\n", | |
| " <td>(no genres listed)</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62416</th>\n", | |
| " <td>209153</td>\n", | |
| " <td>Happy Flight (2008)</td>\n", | |
| " <td>Comedy|Drama</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62417</th>\n", | |
| " <td>209155</td>\n", | |
| " <td>Santosh Subramaniam (2008)</td>\n", | |
| " <td>Action|Comedy|Romance</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62418</th>\n", | |
| " <td>209157</td>\n", | |
| " <td>We (2018)</td>\n", | |
| " <td>Drama</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62419</th>\n", | |
| " <td>209159</td>\n", | |
| " <td>Window of the Soul (2001)</td>\n", | |
| " <td>Documentary</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62420</th>\n", | |
| " <td>209163</td>\n", | |
| " <td>Bad Poems (2018)</td>\n", | |
| " <td>Comedy|Drama</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62421</th>\n", | |
| " <td>209169</td>\n", | |
| " <td>A Girl Thing (2001)</td>\n", | |
| " <td>(no genres listed)</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62422</th>\n", | |
| " <td>209171</td>\n", | |
| " <td>Women of Devil's Island (1962)</td>\n", | |
| " <td>Action|Adventure|Drama</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " movieId title genres\n", | |
| "62413 209145 Liberté (2019) Drama\n", | |
| "62414 209147 The Carpet of Horror (1962) Crime|Horror\n", | |
| "62415 209151 Mao Zedong 1949 (2019) (no genres listed)\n", | |
| "62416 209153 Happy Flight (2008) Comedy|Drama\n", | |
| "62417 209155 Santosh Subramaniam (2008) Action|Comedy|Romance\n", | |
| "62418 209157 We (2018) Drama\n", | |
| "62419 209159 Window of the Soul (2001) Documentary\n", | |
| "62420 209163 Bad Poems (2018) Comedy|Drama\n", | |
| "62421 209169 A Girl Thing (2001) (no genres listed)\n", | |
| "62422 209171 Women of Devil's Island (1962) Action|Adventure|Drama" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "movies_df.tail(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ratings_df = pd.read_csv(\"../data/ml-25m/ml-25m/ratings.csv\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "25000095" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(ratings_df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>userId</th>\n", | |
| " <th>movieId</th>\n", | |
| " <th>rating</th>\n", | |
| " <th>timestamp</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>296</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>1147880044</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>306</td>\n", | |
| " <td>3.5</td>\n", | |
| " <td>1147868817</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>307</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>1147868828</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>665</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>1147878820</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1</td>\n", | |
| " <td>899</td>\n", | |
| " <td>3.5</td>\n", | |
| " <td>1147868510</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " userId movieId rating timestamp\n", | |
| "0 1 296 5.0 1147880044\n", | |
| "1 1 306 3.5 1147868817\n", | |
| "2 1 307 5.0 1147868828\n", | |
| "3 1 665 5.0 1147878820\n", | |
| "4 1 899 3.5 1147868510" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ratings_df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ratings_df.sort_values(by=[\"timestamp\"], inplace=True)\n", | |
| "grouped_ratings = ratings_df.groupby(by=\"userId\").agg(list)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>movieId</th>\n", | |
| " <th>rating</th>\n", | |
| " <th>timestamp</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>userId</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>[5952, 2012, 2011, 1653, 1250, 6539, 6377, 344...</td>\n", | |
| " <td>[4.0, 2.5, 2.5, 4.0, 4.0, 3.5, 4.0, 4.0, 4.0, ...</td>\n", | |
| " <td>[1147868053, 1147868068, 1147868079, 114786809...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>[2797, 5952, 1080, 553, 653, 497, 1374, 1653, ...</td>\n", | |
| " <td>[1.0, 5.0, 1.0, 2.0, 3.0, 4.0, 4.5, 4.5, 3.0, ...</td>\n", | |
| " <td>[1141415509, 1141415528, 1141415532, 114141553...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>[356, 593, 1270, 1, 480, 2571, 260, 318, 1196,...</td>\n", | |
| " <td>[4.0, 4.0, 3.5, 4.0, 2.0, 4.0, 4.0, 4.0, 4.0, ...</td>\n", | |
| " <td>[1439472199, 1439472203, 1439472211, 143947221...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>[97913, 93510, 91630, 93840, 195159, 122914, 1...</td>\n", | |
| " <td>[3.5, 4.0, 3.5, 4.5, 5.0, 3.0, 2.0, 3.5, 2.5, ...</td>\n", | |
| " <td>[1573937091, 1573937096, 1573937103, 157393711...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>[592, 590, 296, 150, 344, 153, 588, 595, 231, ...</td>\n", | |
| " <td>[3.0, 3.0, 4.0, 5.0, 4.0, 3.0, 4.0, 3.0, 4.0, ...</td>\n", | |
| " <td>[830786155, 830786155, 830786155, 830786155, 8...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>[2421, 1466, 161, 902, 858, 2815, 1183, 1704, ...</td>\n", | |
| " <td>[3.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, ...</td>\n", | |
| " <td>[945141530, 945141530, 945141530, 945141564, 9...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>[590, 296, 592, 150, 153, 165, 344, 588, 595, ...</td>\n", | |
| " <td>[3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, ...</td>\n", | |
| " <td>[835444730, 835444730, 835444730, 835444730, 8...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>[1672, 1617, 1777, 1721, 1704, 551, 903, 110, ...</td>\n", | |
| " <td>[4.0, 5.0, 3.0, 4.0, 4.0, 2.0, 4.0, 5.0, 3.0, ...</td>\n", | |
| " <td>[890489203, 890489203, 890489236, 890489263, 8...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>[1073, 260, 1356, 805, 1210, 667, 1367, 61, 85...</td>\n", | |
| " <td>[5.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 3.0, ...</td>\n", | |
| " <td>[859381992, 859382015, 859382042, 859382042, 8...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>[1962, 2915, 2605, 4361, 193, 3361, 3863, 1347...</td>\n", | |
| " <td>[3.0, 3.0, 3.5, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, ...</td>\n", | |
| " <td>[1227570828, 1227570836, 1227570841, 122757085...</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " movieId \\\n", | |
| "userId \n", | |
| "1 [5952, 2012, 2011, 1653, 1250, 6539, 6377, 344... \n", | |
| "2 [2797, 5952, 1080, 553, 653, 497, 1374, 1653, ... \n", | |
| "3 [356, 593, 1270, 1, 480, 2571, 260, 318, 1196,... \n", | |
| "4 [97913, 93510, 91630, 93840, 195159, 122914, 1... \n", | |
| "5 [592, 590, 296, 150, 344, 153, 588, 595, 231, ... \n", | |
| "6 [2421, 1466, 161, 902, 858, 2815, 1183, 1704, ... \n", | |
| "7 [590, 296, 592, 150, 153, 165, 344, 588, 595, ... \n", | |
| "8 [1672, 1617, 1777, 1721, 1704, 551, 903, 110, ... \n", | |
| "9 [1073, 260, 1356, 805, 1210, 667, 1367, 61, 85... \n", | |
| "10 [1962, 2915, 2605, 4361, 193, 3361, 3863, 1347... \n", | |
| "\n", | |
| " rating \\\n", | |
| "userId \n", | |
| "1 [4.0, 2.5, 2.5, 4.0, 4.0, 3.5, 4.0, 4.0, 4.0, ... \n", | |
| "2 [1.0, 5.0, 1.0, 2.0, 3.0, 4.0, 4.5, 4.5, 3.0, ... \n", | |
| "3 [4.0, 4.0, 3.5, 4.0, 2.0, 4.0, 4.0, 4.0, 4.0, ... \n", | |
| "4 [3.5, 4.0, 3.5, 4.5, 5.0, 3.0, 2.0, 3.5, 2.5, ... \n", | |
| "5 [3.0, 3.0, 4.0, 5.0, 4.0, 3.0, 4.0, 3.0, 4.0, ... \n", | |
| "6 [3.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, ... \n", | |
| "7 [3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, ... \n", | |
| "8 [4.0, 5.0, 3.0, 4.0, 4.0, 2.0, 4.0, 5.0, 3.0, ... \n", | |
| "9 [5.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 3.0, ... \n", | |
| "10 [3.0, 3.0, 3.5, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, ... \n", | |
| "\n", | |
| " timestamp \n", | |
| "userId \n", | |
| "1 [1147868053, 1147868068, 1147868079, 114786809... \n", | |
| "2 [1141415509, 1141415528, 1141415532, 114141553... \n", | |
| "3 [1439472199, 1439472203, 1439472211, 143947221... \n", | |
| "4 [1573937091, 1573937096, 1573937103, 157393711... \n", | |
| "5 [830786155, 830786155, 830786155, 830786155, 8... \n", | |
| "6 [945141530, 945141530, 945141530, 945141564, 9... \n", | |
| "7 [835444730, 835444730, 835444730, 835444730, 8... \n", | |
| "8 [890489203, 890489203, 890489236, 890489263, 8... \n", | |
| "9 [859381992, 859382015, 859382042, 859382042, 8... \n", | |
| "10 [1227570828, 1227570836, 1227570841, 122757085... " | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "grouped_ratings.head(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "movieIdMapping = {k:i+2 for i, k in enumerate(sorted(list(ratings_df.movieId.unique())))}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "59047" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(movieIdMapping)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "62423" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(movies_df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "209171" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "max(movieIdMapping.keys())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "1" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "min(movieIdMapping.keys())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "59048" | |
| ] | |
| }, | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "max(movieIdMapping.values())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "min(movieIdMapping.values())\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ratings_df[\"movieId_mapped\"] = ratings_df.movieId.map(movieIdMapping)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>userId</th>\n", | |
| " <th>movieId</th>\n", | |
| " <th>rating</th>\n", | |
| " <th>timestamp</th>\n", | |
| " <th>movieId_mapped</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>326761</th>\n", | |
| " <td>2262</td>\n", | |
| " <td>21</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>789652009</td>\n", | |
| " <td>22</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>326810</th>\n", | |
| " <td>2262</td>\n", | |
| " <td>1079</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>789652009</td>\n", | |
| " <td>1054</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>326767</th>\n", | |
| " <td>2262</td>\n", | |
| " <td>47</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>789652009</td>\n", | |
| " <td>48</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15845015</th>\n", | |
| " <td>102689</td>\n", | |
| " <td>1</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>822873600</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15845023</th>\n", | |
| " <td>102689</td>\n", | |
| " <td>39</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>822873600</td>\n", | |
| " <td>40</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " userId movieId rating timestamp movieId_mapped\n", | |
| "326761 2262 21 3.0 789652009 22\n", | |
| "326810 2262 1079 3.0 789652009 1054\n", | |
| "326767 2262 47 5.0 789652009 48\n", | |
| "15845015 102689 1 4.0 822873600 2\n", | |
| "15845023 102689 39 5.0 822873600 40" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ratings_df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "59048" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ratings_df.movieId_mapped.max()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ratings_df.movieId_mapped.min()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "movies_df[\"movieId_mapped\"] = movies_df.movieId.map(movieIdMapping)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "movies_df.to_csv(\"../data/ml-25m/ml-25m/movies_mapped.csv\", index = False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ratings_df.to_csv(\"../data/ml-25m/ml-25m/ratings_mapped.csv\", index = False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "interpreter": { | |
| "hash": "ff9c16f4f11009bb918bd4cbef0c02902e53456483176d7e27b50617b808988a" | |
| }, | |
| "kernelspec": { | |
| "display_name": "Python 3.7.10 ('clustering')", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.10" | |
| }, | |
| "orig_nbformat": 4 | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment