Created
October 8, 2022 13:18
-
-
Save vatsalsaglani/3c1e3490b0871528743f6ac31bec7176 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import torch\n", | |
"import torch.nn as nn\n", | |
"import torch.nn.functional as F\n", | |
"import random\n", | |
"import sys\n", | |
"sys.path.append(\"../\")\n", | |
"from constants import *" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"movies_df = pd.read_csv(\"../data/ml-25m/ml-25m/movies.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"62423" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(movies_df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>movieId</th>\n", | |
" <th>title</th>\n", | |
" <th>genres</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>62413</th>\n", | |
" <td>209145</td>\n", | |
" <td>Liberté (2019)</td>\n", | |
" <td>Drama</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62414</th>\n", | |
" <td>209147</td>\n", | |
" <td>The Carpet of Horror (1962)</td>\n", | |
" <td>Crime|Horror</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62415</th>\n", | |
" <td>209151</td>\n", | |
" <td>Mao Zedong 1949 (2019)</td>\n", | |
" <td>(no genres listed)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62416</th>\n", | |
" <td>209153</td>\n", | |
" <td>Happy Flight (2008)</td>\n", | |
" <td>Comedy|Drama</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62417</th>\n", | |
" <td>209155</td>\n", | |
" <td>Santosh Subramaniam (2008)</td>\n", | |
" <td>Action|Comedy|Romance</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62418</th>\n", | |
" <td>209157</td>\n", | |
" <td>We (2018)</td>\n", | |
" <td>Drama</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62419</th>\n", | |
" <td>209159</td>\n", | |
" <td>Window of the Soul (2001)</td>\n", | |
" <td>Documentary</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62420</th>\n", | |
" <td>209163</td>\n", | |
" <td>Bad Poems (2018)</td>\n", | |
" <td>Comedy|Drama</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62421</th>\n", | |
" <td>209169</td>\n", | |
" <td>A Girl Thing (2001)</td>\n", | |
" <td>(no genres listed)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62422</th>\n", | |
" <td>209171</td>\n", | |
" <td>Women of Devil's Island (1962)</td>\n", | |
" <td>Action|Adventure|Drama</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" movieId title genres\n", | |
"62413 209145 Liberté (2019) Drama\n", | |
"62414 209147 The Carpet of Horror (1962) Crime|Horror\n", | |
"62415 209151 Mao Zedong 1949 (2019) (no genres listed)\n", | |
"62416 209153 Happy Flight (2008) Comedy|Drama\n", | |
"62417 209155 Santosh Subramaniam (2008) Action|Comedy|Romance\n", | |
"62418 209157 We (2018) Drama\n", | |
"62419 209159 Window of the Soul (2001) Documentary\n", | |
"62420 209163 Bad Poems (2018) Comedy|Drama\n", | |
"62421 209169 A Girl Thing (2001) (no genres listed)\n", | |
"62422 209171 Women of Devil's Island (1962) Action|Adventure|Drama" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"movies_df.tail(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ratings_df = pd.read_csv(\"../data/ml-25m/ml-25m/ratings.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"25000095" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(ratings_df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>userId</th>\n", | |
" <th>movieId</th>\n", | |
" <th>rating</th>\n", | |
" <th>timestamp</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>296</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1147880044</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>306</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1147868817</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>307</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1147868828</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>665</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1147878820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>899</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1147868510</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" userId movieId rating timestamp\n", | |
"0 1 296 5.0 1147880044\n", | |
"1 1 306 3.5 1147868817\n", | |
"2 1 307 5.0 1147868828\n", | |
"3 1 665 5.0 1147878820\n", | |
"4 1 899 3.5 1147868510" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ratings_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ratings_df.sort_values(by=[\"timestamp\"], inplace=True)\n", | |
"grouped_ratings = ratings_df.groupby(by=\"userId\").agg(list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>movieId</th>\n", | |
" <th>rating</th>\n", | |
" <th>timestamp</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>userId</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>[5952, 2012, 2011, 1653, 1250, 6539, 6377, 344...</td>\n", | |
" <td>[4.0, 2.5, 2.5, 4.0, 4.0, 3.5, 4.0, 4.0, 4.0, ...</td>\n", | |
" <td>[1147868053, 1147868068, 1147868079, 114786809...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>[2797, 5952, 1080, 553, 653, 497, 1374, 1653, ...</td>\n", | |
" <td>[1.0, 5.0, 1.0, 2.0, 3.0, 4.0, 4.5, 4.5, 3.0, ...</td>\n", | |
" <td>[1141415509, 1141415528, 1141415532, 114141553...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>[356, 593, 1270, 1, 480, 2571, 260, 318, 1196,...</td>\n", | |
" <td>[4.0, 4.0, 3.5, 4.0, 2.0, 4.0, 4.0, 4.0, 4.0, ...</td>\n", | |
" <td>[1439472199, 1439472203, 1439472211, 143947221...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>[97913, 93510, 91630, 93840, 195159, 122914, 1...</td>\n", | |
" <td>[3.5, 4.0, 3.5, 4.5, 5.0, 3.0, 2.0, 3.5, 2.5, ...</td>\n", | |
" <td>[1573937091, 1573937096, 1573937103, 157393711...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>[592, 590, 296, 150, 344, 153, 588, 595, 231, ...</td>\n", | |
" <td>[3.0, 3.0, 4.0, 5.0, 4.0, 3.0, 4.0, 3.0, 4.0, ...</td>\n", | |
" <td>[830786155, 830786155, 830786155, 830786155, 8...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>[2421, 1466, 161, 902, 858, 2815, 1183, 1704, ...</td>\n", | |
" <td>[3.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, ...</td>\n", | |
" <td>[945141530, 945141530, 945141530, 945141564, 9...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>[590, 296, 592, 150, 153, 165, 344, 588, 595, ...</td>\n", | |
" <td>[3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, ...</td>\n", | |
" <td>[835444730, 835444730, 835444730, 835444730, 8...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>[1672, 1617, 1777, 1721, 1704, 551, 903, 110, ...</td>\n", | |
" <td>[4.0, 5.0, 3.0, 4.0, 4.0, 2.0, 4.0, 5.0, 3.0, ...</td>\n", | |
" <td>[890489203, 890489203, 890489236, 890489263, 8...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>[1073, 260, 1356, 805, 1210, 667, 1367, 61, 85...</td>\n", | |
" <td>[5.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 3.0, ...</td>\n", | |
" <td>[859381992, 859382015, 859382042, 859382042, 8...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>[1962, 2915, 2605, 4361, 193, 3361, 3863, 1347...</td>\n", | |
" <td>[3.0, 3.0, 3.5, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, ...</td>\n", | |
" <td>[1227570828, 1227570836, 1227570841, 122757085...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" movieId \\\n", | |
"userId \n", | |
"1 [5952, 2012, 2011, 1653, 1250, 6539, 6377, 344... \n", | |
"2 [2797, 5952, 1080, 553, 653, 497, 1374, 1653, ... \n", | |
"3 [356, 593, 1270, 1, 480, 2571, 260, 318, 1196,... \n", | |
"4 [97913, 93510, 91630, 93840, 195159, 122914, 1... \n", | |
"5 [592, 590, 296, 150, 344, 153, 588, 595, 231, ... \n", | |
"6 [2421, 1466, 161, 902, 858, 2815, 1183, 1704, ... \n", | |
"7 [590, 296, 592, 150, 153, 165, 344, 588, 595, ... \n", | |
"8 [1672, 1617, 1777, 1721, 1704, 551, 903, 110, ... \n", | |
"9 [1073, 260, 1356, 805, 1210, 667, 1367, 61, 85... \n", | |
"10 [1962, 2915, 2605, 4361, 193, 3361, 3863, 1347... \n", | |
"\n", | |
" rating \\\n", | |
"userId \n", | |
"1 [4.0, 2.5, 2.5, 4.0, 4.0, 3.5, 4.0, 4.0, 4.0, ... \n", | |
"2 [1.0, 5.0, 1.0, 2.0, 3.0, 4.0, 4.5, 4.5, 3.0, ... \n", | |
"3 [4.0, 4.0, 3.5, 4.0, 2.0, 4.0, 4.0, 4.0, 4.0, ... \n", | |
"4 [3.5, 4.0, 3.5, 4.5, 5.0, 3.0, 2.0, 3.5, 2.5, ... \n", | |
"5 [3.0, 3.0, 4.0, 5.0, 4.0, 3.0, 4.0, 3.0, 4.0, ... \n", | |
"6 [3.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, ... \n", | |
"7 [3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, ... \n", | |
"8 [4.0, 5.0, 3.0, 4.0, 4.0, 2.0, 4.0, 5.0, 3.0, ... \n", | |
"9 [5.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 3.0, ... \n", | |
"10 [3.0, 3.0, 3.5, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, ... \n", | |
"\n", | |
" timestamp \n", | |
"userId \n", | |
"1 [1147868053, 1147868068, 1147868079, 114786809... \n", | |
"2 [1141415509, 1141415528, 1141415532, 114141553... \n", | |
"3 [1439472199, 1439472203, 1439472211, 143947221... \n", | |
"4 [1573937091, 1573937096, 1573937103, 157393711... \n", | |
"5 [830786155, 830786155, 830786155, 830786155, 8... \n", | |
"6 [945141530, 945141530, 945141530, 945141564, 9... \n", | |
"7 [835444730, 835444730, 835444730, 835444730, 8... \n", | |
"8 [890489203, 890489203, 890489236, 890489263, 8... \n", | |
"9 [859381992, 859382015, 859382042, 859382042, 8... \n", | |
"10 [1227570828, 1227570836, 1227570841, 122757085... " | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"grouped_ratings.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"movieIdMapping = {k:i+2 for i, k in enumerate(sorted(list(ratings_df.movieId.unique())))}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"59047" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(movieIdMapping)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"62423" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(movies_df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"209171" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"max(movieIdMapping.keys())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"min(movieIdMapping.keys())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"59048" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"max(movieIdMapping.values())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"min(movieIdMapping.values())\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ratings_df[\"movieId_mapped\"] = ratings_df.movieId.map(movieIdMapping)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>userId</th>\n", | |
" <th>movieId</th>\n", | |
" <th>rating</th>\n", | |
" <th>timestamp</th>\n", | |
" <th>movieId_mapped</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>326761</th>\n", | |
" <td>2262</td>\n", | |
" <td>21</td>\n", | |
" <td>3.0</td>\n", | |
" <td>789652009</td>\n", | |
" <td>22</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>326810</th>\n", | |
" <td>2262</td>\n", | |
" <td>1079</td>\n", | |
" <td>3.0</td>\n", | |
" <td>789652009</td>\n", | |
" <td>1054</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>326767</th>\n", | |
" <td>2262</td>\n", | |
" <td>47</td>\n", | |
" <td>5.0</td>\n", | |
" <td>789652009</td>\n", | |
" <td>48</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15845015</th>\n", | |
" <td>102689</td>\n", | |
" <td>1</td>\n", | |
" <td>4.0</td>\n", | |
" <td>822873600</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15845023</th>\n", | |
" <td>102689</td>\n", | |
" <td>39</td>\n", | |
" <td>5.0</td>\n", | |
" <td>822873600</td>\n", | |
" <td>40</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" userId movieId rating timestamp movieId_mapped\n", | |
"326761 2262 21 3.0 789652009 22\n", | |
"326810 2262 1079 3.0 789652009 1054\n", | |
"326767 2262 47 5.0 789652009 48\n", | |
"15845015 102689 1 4.0 822873600 2\n", | |
"15845023 102689 39 5.0 822873600 40" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ratings_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"59048" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ratings_df.movieId_mapped.max()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ratings_df.movieId_mapped.min()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"movies_df[\"movieId_mapped\"] = movies_df.movieId.map(movieIdMapping)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"movies_df.to_csv(\"../data/ml-25m/ml-25m/movies_mapped.csv\", index = False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ratings_df.to_csv(\"../data/ml-25m/ml-25m/ratings_mapped.csv\", index = False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"interpreter": { | |
"hash": "ff9c16f4f11009bb918bd4cbef0c02902e53456483176d7e27b50617b808988a" | |
}, | |
"kernelspec": { | |
"display_name": "Python 3.7.10 ('clustering')", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.10" | |
}, | |
"orig_nbformat": 4 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment