vatsalsaglani · October 8, 2022 13:18
diff --git a/data_processing.ipynb b/data_processing.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import random\n",
    "import sys\n",
    "sys.path.append(\"../\")\n",
    "from constants import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "movies_df = pd.read_csv(\"../data/ml-25m/ml-25m/movies.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "62423"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(movies_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>62413</th>\n",
       "      <td>209145</td>\n",
       "      <td>Liberté (2019)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62414</th>\n",
       "      <td>209147</td>\n",
       "      <td>The Carpet of Horror (1962)</td>\n",
       "      <td>Crime|Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62415</th>\n",
       "      <td>209151</td>\n",
       "      <td>Mao Zedong 1949 (2019)</td>\n",
       "      <td>(no genres listed)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62416</th>\n",
       "      <td>209153</td>\n",
       "      <td>Happy Flight (2008)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62417</th>\n",
       "      <td>209155</td>\n",
       "      <td>Santosh Subramaniam (2008)</td>\n",
       "      <td>Action|Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62418</th>\n",
       "      <td>209157</td>\n",
       "      <td>We (2018)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62419</th>\n",
       "      <td>209159</td>\n",
       "      <td>Window of the Soul (2001)</td>\n",
       "      <td>Documentary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62420</th>\n",
       "      <td>209163</td>\n",
       "      <td>Bad Poems (2018)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62421</th>\n",
       "      <td>209169</td>\n",
       "      <td>A Girl Thing (2001)</td>\n",
       "      <td>(no genres listed)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62422</th>\n",
       "      <td>209171</td>\n",
       "      <td>Women of Devil's Island (1962)</td>\n",
       "      <td>Action|Adventure|Drama</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       movieId                           title                  genres\n",
       "62413   209145                  Liberté (2019)                   Drama\n",
       "62414   209147     The Carpet of Horror (1962)            Crime|Horror\n",
       "62415   209151          Mao Zedong 1949 (2019)      (no genres listed)\n",
       "62416   209153             Happy Flight (2008)            Comedy|Drama\n",
       "62417   209155      Santosh Subramaniam (2008)   Action|Comedy|Romance\n",
       "62418   209157                       We (2018)                   Drama\n",
       "62419   209159       Window of the Soul (2001)             Documentary\n",
       "62420   209163                Bad Poems (2018)            Comedy|Drama\n",
       "62421   209169             A Girl Thing (2001)      (no genres listed)\n",
       "62422   209171  Women of Devil's Island (1962)  Action|Adventure|Drama"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies_df.tail(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "ratings_df = pd.read_csv(\"../data/ml-25m/ml-25m/ratings.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "25000095"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(ratings_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>296</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1147880044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>306</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1147868817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>307</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1147868828</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>665</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1147878820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>899</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1147868510</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating   timestamp\n",
       "0       1      296     5.0  1147880044\n",
       "1       1      306     3.5  1147868817\n",
       "2       1      307     5.0  1147868828\n",
       "3       1      665     5.0  1147878820\n",
       "4       1      899     3.5  1147868510"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ratings_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "ratings_df.sort_values(by=[\"timestamp\"], inplace=True)\n",
    "grouped_ratings = ratings_df.groupby(by=\"userId\").agg(list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>userId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[5952, 2012, 2011, 1653, 1250, 6539, 6377, 344...</td>\n",
       "      <td>[4.0, 2.5, 2.5, 4.0, 4.0, 3.5, 4.0, 4.0, 4.0, ...</td>\n",
       "      <td>[1147868053, 1147868068, 1147868079, 114786809...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[2797, 5952, 1080, 553, 653, 497, 1374, 1653, ...</td>\n",
       "      <td>[1.0, 5.0, 1.0, 2.0, 3.0, 4.0, 4.5, 4.5, 3.0, ...</td>\n",
       "      <td>[1141415509, 1141415528, 1141415532, 114141553...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[356, 593, 1270, 1, 480, 2571, 260, 318, 1196,...</td>\n",
       "      <td>[4.0, 4.0, 3.5, 4.0, 2.0, 4.0, 4.0, 4.0, 4.0, ...</td>\n",
       "      <td>[1439472199, 1439472203, 1439472211, 143947221...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[97913, 93510, 91630, 93840, 195159, 122914, 1...</td>\n",
       "      <td>[3.5, 4.0, 3.5, 4.5, 5.0, 3.0, 2.0, 3.5, 2.5, ...</td>\n",
       "      <td>[1573937091, 1573937096, 1573937103, 157393711...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>[592, 590, 296, 150, 344, 153, 588, 595, 231, ...</td>\n",
       "      <td>[3.0, 3.0, 4.0, 5.0, 4.0, 3.0, 4.0, 3.0, 4.0, ...</td>\n",
       "      <td>[830786155, 830786155, 830786155, 830786155, 8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>[2421, 1466, 161, 902, 858, 2815, 1183, 1704, ...</td>\n",
       "      <td>[3.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, ...</td>\n",
       "      <td>[945141530, 945141530, 945141530, 945141564, 9...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[590, 296, 592, 150, 153, 165, 344, 588, 595, ...</td>\n",
       "      <td>[3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, ...</td>\n",
       "      <td>[835444730, 835444730, 835444730, 835444730, 8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>[1672, 1617, 1777, 1721, 1704, 551, 903, 110, ...</td>\n",
       "      <td>[4.0, 5.0, 3.0, 4.0, 4.0, 2.0, 4.0, 5.0, 3.0, ...</td>\n",
       "      <td>[890489203, 890489203, 890489236, 890489263, 8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>[1073, 260, 1356, 805, 1210, 667, 1367, 61, 85...</td>\n",
       "      <td>[5.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 3.0, ...</td>\n",
       "      <td>[859381992, 859382015, 859382042, 859382042, 8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>[1962, 2915, 2605, 4361, 193, 3361, 3863, 1347...</td>\n",
       "      <td>[3.0, 3.0, 3.5, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, ...</td>\n",
       "      <td>[1227570828, 1227570836, 1227570841, 122757085...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  movieId  \\\n",
       "userId                                                      \n",
       "1       [5952, 2012, 2011, 1653, 1250, 6539, 6377, 344...   \n",
       "2       [2797, 5952, 1080, 553, 653, 497, 1374, 1653, ...   \n",
       "3       [356, 593, 1270, 1, 480, 2571, 260, 318, 1196,...   \n",
       "4       [97913, 93510, 91630, 93840, 195159, 122914, 1...   \n",
       "5       [592, 590, 296, 150, 344, 153, 588, 595, 231, ...   \n",
       "6       [2421, 1466, 161, 902, 858, 2815, 1183, 1704, ...   \n",
       "7       [590, 296, 592, 150, 153, 165, 344, 588, 595, ...   \n",
       "8       [1672, 1617, 1777, 1721, 1704, 551, 903, 110, ...   \n",
       "9       [1073, 260, 1356, 805, 1210, 667, 1367, 61, 85...   \n",
       "10      [1962, 2915, 2605, 4361, 193, 3361, 3863, 1347...   \n",
       "\n",
       "                                                   rating  \\\n",
       "userId                                                      \n",
       "1       [4.0, 2.5, 2.5, 4.0, 4.0, 3.5, 4.0, 4.0, 4.0, ...   \n",
       "2       [1.0, 5.0, 1.0, 2.0, 3.0, 4.0, 4.5, 4.5, 3.0, ...   \n",
       "3       [4.0, 4.0, 3.5, 4.0, 2.0, 4.0, 4.0, 4.0, 4.0, ...   \n",
       "4       [3.5, 4.0, 3.5, 4.5, 5.0, 3.0, 2.0, 3.5, 2.5, ...   \n",
       "5       [3.0, 3.0, 4.0, 5.0, 4.0, 3.0, 4.0, 3.0, 4.0, ...   \n",
       "6       [3.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, ...   \n",
       "7       [3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, ...   \n",
       "8       [4.0, 5.0, 3.0, 4.0, 4.0, 2.0, 4.0, 5.0, 3.0, ...   \n",
       "9       [5.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 3.0, ...   \n",
       "10      [3.0, 3.0, 3.5, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, ...   \n",
       "\n",
       "                                                timestamp  \n",
       "userId                                                     \n",
       "1       [1147868053, 1147868068, 1147868079, 114786809...  \n",
       "2       [1141415509, 1141415528, 1141415532, 114141553...  \n",
       "3       [1439472199, 1439472203, 1439472211, 143947221...  \n",
       "4       [1573937091, 1573937096, 1573937103, 157393711...  \n",
       "5       [830786155, 830786155, 830786155, 830786155, 8...  \n",
       "6       [945141530, 945141530, 945141530, 945141564, 9...  \n",
       "7       [835444730, 835444730, 835444730, 835444730, 8...  \n",
       "8       [890489203, 890489203, 890489236, 890489263, 8...  \n",
       "9       [859381992, 859382015, 859382042, 859382042, 8...  \n",
       "10      [1227570828, 1227570836, 1227570841, 122757085...  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped_ratings.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "movieIdMapping = {k:i+2 for i, k in enumerate(sorted(list(ratings_df.movieId.unique())))}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "59047"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(movieIdMapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "62423"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(movies_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "209171"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max(movieIdMapping.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(movieIdMapping.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "59048"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max(movieIdMapping.values())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(movieIdMapping.values())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "ratings_df[\"movieId_mapped\"] = ratings_df.movieId.map(movieIdMapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>movieId_mapped</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>326761</th>\n",
       "      <td>2262</td>\n",
       "      <td>21</td>\n",
       "      <td>3.0</td>\n",
       "      <td>789652009</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>326810</th>\n",
       "      <td>2262</td>\n",
       "      <td>1079</td>\n",
       "      <td>3.0</td>\n",
       "      <td>789652009</td>\n",
       "      <td>1054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>326767</th>\n",
       "      <td>2262</td>\n",
       "      <td>47</td>\n",
       "      <td>5.0</td>\n",
       "      <td>789652009</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15845015</th>\n",
       "      <td>102689</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>822873600</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15845023</th>\n",
       "      <td>102689</td>\n",
       "      <td>39</td>\n",
       "      <td>5.0</td>\n",
       "      <td>822873600</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          userId  movieId  rating  timestamp  movieId_mapped\n",
       "326761      2262       21     3.0  789652009              22\n",
       "326810      2262     1079     3.0  789652009            1054\n",
       "326767      2262       47     5.0  789652009              48\n",
       "15845015  102689        1     4.0  822873600               2\n",
       "15845023  102689       39     5.0  822873600              40"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ratings_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "59048"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ratings_df.movieId_mapped.max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ratings_df.movieId_mapped.min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "movies_df[\"movieId_mapped\"] = movies_df.movieId.map(movieIdMapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "movies_df.to_csv(\"../data/ml-25m/ml-25m/movies_mapped.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "ratings_df.to_csv(\"../data/ml-25m/ml-25m/ratings_mapped.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "ff9c16f4f11009bb918bd4cbef0c02902e53456483176d7e27b50617b808988a"
  },
  "kernelspec": {
   "display_name": "Python 3.7.10 ('clustering')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"import pandas as pd\n",
	"import torch\n",
	"import torch.nn as nn\n",
	"import torch.nn.functional as F\n",
	"import random\n",
	"import sys\n",
	"sys.path.append(\"../\")\n",
	"from constants import *"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"movies_df = pd.read_csv(\"../data/ml-25m/ml-25m/movies.csv\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"62423"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"len(movies_df)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>movieId</th>\n",
	" <th>title</th>\n",
	" <th>genres</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>62413</th>\n",
	" <td>209145</td>\n",
	" <td>Liberté (2019)</td>\n",
	" <td>Drama</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62414</th>\n",
	" <td>209147</td>\n",
	" <td>The Carpet of Horror (1962)</td>\n",
	" <td>Crime\|Horror</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62415</th>\n",
	" <td>209151</td>\n",
	" <td>Mao Zedong 1949 (2019)</td>\n",
	" <td>(no genres listed)</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62416</th>\n",
	" <td>209153</td>\n",
	" <td>Happy Flight (2008)</td>\n",
	" <td>Comedy\|Drama</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62417</th>\n",
	" <td>209155</td>\n",
	" <td>Santosh Subramaniam (2008)</td>\n",
	" <td>Action\|Comedy\|Romance</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62418</th>\n",
	" <td>209157</td>\n",
	" <td>We (2018)</td>\n",
	" <td>Drama</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62419</th>\n",
	" <td>209159</td>\n",
	" <td>Window of the Soul (2001)</td>\n",
	" <td>Documentary</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62420</th>\n",
	" <td>209163</td>\n",
	" <td>Bad Poems (2018)</td>\n",
	" <td>Comedy\|Drama</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62421</th>\n",
	" <td>209169</td>\n",
	" <td>A Girl Thing (2001)</td>\n",
	" <td>(no genres listed)</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>62422</th>\n",
	" <td>209171</td>\n",
	" <td>Women of Devil's Island (1962)</td>\n",
	" <td>Action\|Adventure\|Drama</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" movieId title genres\n",
	"62413 209145 Liberté (2019) Drama\n",
	"62414 209147 The Carpet of Horror (1962) Crime\|Horror\n",
	"62415 209151 Mao Zedong 1949 (2019) (no genres listed)\n",
	"62416 209153 Happy Flight (2008) Comedy\|Drama\n",
	"62417 209155 Santosh Subramaniam (2008) Action\|Comedy\|Romance\n",
	"62418 209157 We (2018) Drama\n",
	"62419 209159 Window of the Soul (2001) Documentary\n",
	"62420 209163 Bad Poems (2018) Comedy\|Drama\n",
	"62421 209169 A Girl Thing (2001) (no genres listed)\n",
	"62422 209171 Women of Devil's Island (1962) Action\|Adventure\|Drama"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"movies_df.tail(10)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"ratings_df = pd.read_csv(\"../data/ml-25m/ml-25m/ratings.csv\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"25000095"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"len(ratings_df)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>userId</th>\n",
	" <th>movieId</th>\n",
	" <th>rating</th>\n",
	" <th>timestamp</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>296</td>\n",
	" <td>5.0</td>\n",
	" <td>1147880044</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1</td>\n",
	" <td>306</td>\n",
	" <td>3.5</td>\n",
	" <td>1147868817</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>1</td>\n",
	" <td>307</td>\n",
	" <td>5.0</td>\n",
	" <td>1147868828</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>1</td>\n",
	" <td>665</td>\n",
	" <td>5.0</td>\n",
	" <td>1147878820</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>1</td>\n",
	" <td>899</td>\n",
	" <td>3.5</td>\n",
	" <td>1147868510</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" userId movieId rating timestamp\n",
	"0 1 296 5.0 1147880044\n",
	"1 1 306 3.5 1147868817\n",
	"2 1 307 5.0 1147868828\n",
	"3 1 665 5.0 1147878820\n",
	"4 1 899 3.5 1147868510"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ratings_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"ratings_df.sort_values(by=[\"timestamp\"], inplace=True)\n",
	"grouped_ratings = ratings_df.groupby(by=\"userId\").agg(list)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>movieId</th>\n",
	" <th>rating</th>\n",
	" <th>timestamp</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>userId</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>[5952, 2012, 2011, 1653, 1250, 6539, 6377, 344...</td>\n",
	" <td>[4.0, 2.5, 2.5, 4.0, 4.0, 3.5, 4.0, 4.0, 4.0, ...</td>\n",
	" <td>[1147868053, 1147868068, 1147868079, 114786809...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>[2797, 5952, 1080, 553, 653, 497, 1374, 1653, ...</td>\n",
	" <td>[1.0, 5.0, 1.0, 2.0, 3.0, 4.0, 4.5, 4.5, 3.0, ...</td>\n",
	" <td>[1141415509, 1141415528, 1141415532, 114141553...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>[356, 593, 1270, 1, 480, 2571, 260, 318, 1196,...</td>\n",
	" <td>[4.0, 4.0, 3.5, 4.0, 2.0, 4.0, 4.0, 4.0, 4.0, ...</td>\n",
	" <td>[1439472199, 1439472203, 1439472211, 143947221...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>[97913, 93510, 91630, 93840, 195159, 122914, 1...</td>\n",
	" <td>[3.5, 4.0, 3.5, 4.5, 5.0, 3.0, 2.0, 3.5, 2.5, ...</td>\n",
	" <td>[1573937091, 1573937096, 1573937103, 157393711...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>[592, 590, 296, 150, 344, 153, 588, 595, 231, ...</td>\n",
	" <td>[3.0, 3.0, 4.0, 5.0, 4.0, 3.0, 4.0, 3.0, 4.0, ...</td>\n",
	" <td>[830786155, 830786155, 830786155, 830786155, 8...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>[2421, 1466, 161, 902, 858, 2815, 1183, 1704, ...</td>\n",
	" <td>[3.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, ...</td>\n",
	" <td>[945141530, 945141530, 945141530, 945141564, 9...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>[590, 296, 592, 150, 153, 165, 344, 588, 595, ...</td>\n",
	" <td>[3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, ...</td>\n",
	" <td>[835444730, 835444730, 835444730, 835444730, 8...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <td>[1672, 1617, 1777, 1721, 1704, 551, 903, 110, ...</td>\n",
	" <td>[4.0, 5.0, 3.0, 4.0, 4.0, 2.0, 4.0, 5.0, 3.0, ...</td>\n",
	" <td>[890489203, 890489203, 890489236, 890489263, 8...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <td>[1073, 260, 1356, 805, 1210, 667, 1367, 61, 85...</td>\n",
	" <td>[5.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 3.0, ...</td>\n",
	" <td>[859381992, 859382015, 859382042, 859382042, 8...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>10</th>\n",
	" <td>[1962, 2915, 2605, 4361, 193, 3361, 3863, 1347...</td>\n",
	" <td>[3.0, 3.0, 3.5, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, ...</td>\n",
	" <td>[1227570828, 1227570836, 1227570841, 122757085...</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" movieId \\\n",
	"userId \n",
	"1 [5952, 2012, 2011, 1653, 1250, 6539, 6377, 344... \n",
	"2 [2797, 5952, 1080, 553, 653, 497, 1374, 1653, ... \n",
	"3 [356, 593, 1270, 1, 480, 2571, 260, 318, 1196,... \n",
	"4 [97913, 93510, 91630, 93840, 195159, 122914, 1... \n",
	"5 [592, 590, 296, 150, 344, 153, 588, 595, 231, ... \n",
	"6 [2421, 1466, 161, 902, 858, 2815, 1183, 1704, ... \n",
	"7 [590, 296, 592, 150, 153, 165, 344, 588, 595, ... \n",
	"8 [1672, 1617, 1777, 1721, 1704, 551, 903, 110, ... \n",
	"9 [1073, 260, 1356, 805, 1210, 667, 1367, 61, 85... \n",
	"10 [1962, 2915, 2605, 4361, 193, 3361, 3863, 1347... \n",
	"\n",
	" rating \\\n",
	"userId \n",
	"1 [4.0, 2.5, 2.5, 4.0, 4.0, 3.5, 4.0, 4.0, 4.0, ... \n",
	"2 [1.0, 5.0, 1.0, 2.0, 3.0, 4.0, 4.5, 4.5, 3.0, ... \n",
	"3 [4.0, 4.0, 3.5, 4.0, 2.0, 4.0, 4.0, 4.0, 4.0, ... \n",
	"4 [3.5, 4.0, 3.5, 4.5, 5.0, 3.0, 2.0, 3.5, 2.5, ... \n",
	"5 [3.0, 3.0, 4.0, 5.0, 4.0, 3.0, 4.0, 3.0, 4.0, ... \n",
	"6 [3.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, ... \n",
	"7 [3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, ... \n",
	"8 [4.0, 5.0, 3.0, 4.0, 4.0, 2.0, 4.0, 5.0, 3.0, ... \n",
	"9 [5.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 3.0, ... \n",
	"10 [3.0, 3.0, 3.5, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, ... \n",
	"\n",
	" timestamp \n",
	"userId \n",
	"1 [1147868053, 1147868068, 1147868079, 114786809... \n",
	"2 [1141415509, 1141415528, 1141415532, 114141553... \n",
	"3 [1439472199, 1439472203, 1439472211, 143947221... \n",
	"4 [1573937091, 1573937096, 1573937103, 157393711... \n",
	"5 [830786155, 830786155, 830786155, 830786155, 8... \n",
	"6 [945141530, 945141530, 945141530, 945141564, 9... \n",
	"7 [835444730, 835444730, 835444730, 835444730, 8... \n",
	"8 [890489203, 890489203, 890489236, 890489263, 8... \n",
	"9 [859381992, 859382015, 859382042, 859382042, 8... \n",
	"10 [1227570828, 1227570836, 1227570841, 122757085... "
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"grouped_ratings.head(10)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"movieIdMapping = {k:i+2 for i, k in enumerate(sorted(list(ratings_df.movieId.unique())))}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"59047"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"len(movieIdMapping)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"62423"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"len(movies_df)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"209171"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"max(movieIdMapping.keys())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"min(movieIdMapping.keys())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"59048"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"max(movieIdMapping.values())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"2"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"min(movieIdMapping.values())\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [],
	"source": [
	"ratings_df[\"movieId_mapped\"] = ratings_df.movieId.map(movieIdMapping)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>userId</th>\n",
	" <th>movieId</th>\n",
	" <th>rating</th>\n",
	" <th>timestamp</th>\n",
	" <th>movieId_mapped</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>326761</th>\n",
	" <td>2262</td>\n",
	" <td>21</td>\n",
	" <td>3.0</td>\n",
	" <td>789652009</td>\n",
	" <td>22</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>326810</th>\n",
	" <td>2262</td>\n",
	" <td>1079</td>\n",
	" <td>3.0</td>\n",
	" <td>789652009</td>\n",
	" <td>1054</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>326767</th>\n",
	" <td>2262</td>\n",
	" <td>47</td>\n",
	" <td>5.0</td>\n",
	" <td>789652009</td>\n",
	" <td>48</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>15845015</th>\n",
	" <td>102689</td>\n",
	" <td>1</td>\n",
	" <td>4.0</td>\n",
	" <td>822873600</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>15845023</th>\n",
	" <td>102689</td>\n",
	" <td>39</td>\n",
	" <td>5.0</td>\n",
	" <td>822873600</td>\n",
	" <td>40</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" userId movieId rating timestamp movieId_mapped\n",
	"326761 2262 21 3.0 789652009 22\n",
	"326810 2262 1079 3.0 789652009 1054\n",
	"326767 2262 47 5.0 789652009 48\n",
	"15845015 102689 1 4.0 822873600 2\n",
	"15845023 102689 39 5.0 822873600 40"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ratings_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"59048"
	]
	},
	"execution_count": 21,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ratings_df.movieId_mapped.max()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"2"
	]
	},
	"execution_count": 22,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ratings_df.movieId_mapped.min()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [],
	"source": [
	"movies_df[\"movieId_mapped\"] = movies_df.movieId.map(movieIdMapping)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {},
	"outputs": [],
	"source": [
	"movies_df.to_csv(\"../data/ml-25m/ml-25m/movies_mapped.csv\", index = False)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [],
	"source": [
	"ratings_df.to_csv(\"../data/ml-25m/ml-25m/ratings_mapped.csv\", index = False)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"interpreter": {
	"hash": "ff9c16f4f11009bb918bd4cbef0c02902e53456483176d7e27b50617b808988a"
	},
	"kernelspec": {
	"display_name": "Python 3.7.10 ('clustering')",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.10"
	},
	"orig_nbformat": 4
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}