Skip to content

Instantly share code, notes, and snippets.

@ischurov
Created October 9, 2024 09:22
Show Gist options
  • Save ischurov/015009fb98ae31aea801edefdb2f0f29 to your computer and use it in GitHub Desktop.
Save ischurov/015009fb98ae31aea801edefdb2f0f29 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:37:37.910603Z",
"start_time": "2024-10-08T15:37:37.906545Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import numpy as np"
],
"id": "c58cb687d261f85e",
"outputs": [],
"execution_count": 24
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:22:19.759802Z",
"start_time": "2024-10-08T15:22:19.749133Z"
}
},
"cell_type": "code",
"source": "ser = pd.Series([0, 10, 20], index=['a', 'b', 'c'])",
"id": "bd3ec52619be18da",
"outputs": [],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:23:46.855862Z",
"start_time": "2024-10-08T15:23:46.851027Z"
}
},
"cell_type": "code",
"source": "ser * 2",
"id": "bf11f176dd6eb449",
"outputs": [
{
"data": {
"text/plain": [
"a 0\n",
"b 20\n",
"c 40\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:24:48.946532Z",
"start_time": "2024-10-08T15:24:48.939546Z"
}
},
"cell_type": "code",
"source": "ser['b']",
"id": "1f4a1b50dc99ca6d",
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:25:05.735230Z",
"start_time": "2024-10-08T15:25:05.727388Z"
}
},
"cell_type": "code",
"source": "pd.Series({'a': 0, 'b': 10})",
"id": "8ac58ae7312b1fb8",
"outputs": [
{
"data": {
"text/plain": [
"a 0\n",
"b 10\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 7
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:26:16.079105Z",
"start_time": "2024-10-08T15:26:16.074238Z"
}
},
"cell_type": "code",
"source": "ser.iloc[1]",
"id": "c624ea4fb6d288cc",
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 9
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:28:32.285833Z",
"start_time": "2024-10-08T15:28:32.281799Z"
}
},
"cell_type": "code",
"source": [
"alice = pd.Series({'Algebra': 5, 'Geometry': 3, 'Music': 4})\n",
"bob = pd.Series({'Algebra': 4, 'Music': 3, 'History': 4})"
],
"id": "5a4b994054ef57d1",
"outputs": [],
"execution_count": 10
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:30:26.399757Z",
"start_time": "2024-10-08T15:30:26.392114Z"
}
},
"cell_type": "code",
"source": "alice.dtype",
"id": "318f10ae9ae68be7",
"outputs": [
{
"data": {
"text/plain": [
"dtype('int64')"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 13
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:30:40.231293Z",
"start_time": "2024-10-08T15:30:40.226064Z"
}
},
"cell_type": "code",
"source": "grade = alice + bob",
"id": "1f63449f7fe2ba89",
"outputs": [],
"execution_count": 14
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:30:44.898467Z",
"start_time": "2024-10-08T15:30:44.891996Z"
}
},
"cell_type": "code",
"source": "grade.dtype",
"id": "98a269af4cce110b",
"outputs": [
{
"data": {
"text/plain": [
"dtype('float64')"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 15
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:30:51.322820Z",
"start_time": "2024-10-08T15:30:51.315929Z"
}
},
"cell_type": "code",
"source": "grade",
"id": "64b2fc3f8558262f",
"outputs": [
{
"data": {
"text/plain": [
"Algebra 9.0\n",
"Geometry NaN\n",
"History NaN\n",
"Music 7.0\n",
"dtype: float64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 16
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:32:50.711821Z",
"start_time": "2024-10-08T15:32:50.705023Z"
}
},
"cell_type": "code",
"source": "grade['Geometry']",
"id": "15a8c8c5d9e5b2ed",
"outputs": [
{
"data": {
"text/plain": [
"nan"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 17
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:33:06.697152Z",
"start_time": "2024-10-08T15:33:06.692071Z"
}
},
"cell_type": "code",
"source": "float(\"NaN\")",
"id": "7959b7dbf41f9f67",
"outputs": [
{
"data": {
"text/plain": [
"nan"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 18
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:33:52.682736Z",
"start_time": "2024-10-08T15:33:52.677059Z"
}
},
"cell_type": "code",
"source": [
"if grade['Geometry'] == float(\"NaN\"):\n",
" print(\"It's a NaN\")\n",
"else:\n",
" print(\"????!!!\")"
],
"id": "43fad29e9b0ac7fc",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"????!!!\n"
]
}
],
"execution_count": 19
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:34:38.834179Z",
"start_time": "2024-10-08T15:34:38.829401Z"
}
},
"cell_type": "code",
"source": [
"if float(\"NaN\") == float(\"NaN\"):\n",
" print(\"It's a NaN\")\n",
"else:\n",
" print(\"????!!!\")"
],
"id": "903f09a11cd3cd84",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"????!!!\n"
]
}
],
"execution_count": 20
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:36:51.374542Z",
"start_time": "2024-10-08T15:36:51.370393Z"
}
},
"cell_type": "code",
"source": [
"if pd.isna(float(\"NaN\")):\n",
" print(\"Yes\")"
],
"id": "639ab94aca4ac217",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Yes\n"
]
}
],
"execution_count": 22
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:37:11.332300Z",
"start_time": "2024-10-08T15:37:11.326852Z"
}
},
"cell_type": "code",
"source": "pd.isna(None)",
"id": "fba4807a8f25e5af",
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 23
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:37:48.652825Z",
"start_time": "2024-10-08T15:37:48.648008Z"
}
},
"cell_type": "code",
"source": "np.isnan(float(\"NaN\"))",
"id": "7d0746a7a05b458",
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 25
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:37:54.472780Z",
"start_time": "2024-10-08T15:37:53.617154Z"
}
},
"cell_type": "code",
"source": "np.isnan(None)",
"id": "3b6e3cfacf72325e",
"outputs": [
{
"ename": "TypeError",
"evalue": "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[0;32mIn[26], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m np\u001B[38;5;241m.\u001B[39misnan(\u001B[38;5;28;01mNone\u001B[39;00m)\n",
"\u001B[0;31mTypeError\u001B[0m: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''"
]
}
],
"execution_count": 26
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:38:46.761386Z",
"start_time": "2024-10-08T15:38:46.755984Z"
}
},
"cell_type": "code",
"source": "alice",
"id": "c9e96c0b0b4d9bfa",
"outputs": [
{
"data": {
"text/plain": [
"Algebra 5\n",
"Geometry 3\n",
"Music 4\n",
"dtype: int64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 27
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:39:57.648890Z",
"start_time": "2024-10-08T15:39:57.630252Z"
}
},
"cell_type": "code",
"source": "alice['Algebra':'Geometry']",
"id": "762014ceac75eb64",
"outputs": [
{
"data": {
"text/plain": [
"Algebra 5\n",
"Geometry 3\n",
"dtype: int64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 28
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:41:37.351614Z",
"start_time": "2024-10-08T15:41:37.346248Z"
}
},
"cell_type": "code",
"source": "alice[0:1 + 1]",
"id": "a3a4732f7eb6c6c6",
"outputs": [
{
"data": {
"text/plain": [
"Algebra 5\n",
"Geometry 3\n",
"dtype: int64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 30
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:42:44.651498Z",
"start_time": "2024-10-08T15:42:44.646548Z"
}
},
"cell_type": "code",
"source": "alice.values",
"id": "2626541ab7b04f17",
"outputs": [
{
"data": {
"text/plain": [
"array([5, 3, 4])"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 31
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:45:34.761149Z",
"start_time": "2024-10-08T15:45:34.754972Z"
}
},
"cell_type": "code",
"source": [
"df = pd.DataFrame([['Alice', 1, 2, 3], ['Bob', 4, 5, 6], ['Claudia', 2, 3, 4]],\n",
" columns=['Name', 'Algebra', 'Geometry', 'Calculus']\n",
" )"
],
"id": "acde8e41fddbe10c",
"outputs": [],
"execution_count": 32
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:46:17.242826Z",
"start_time": "2024-10-08T15:46:17.236930Z"
}
},
"cell_type": "code",
"source": "df.dtypes",
"id": "cf37a1dfc0369020",
"outputs": [
{
"data": {
"text/plain": [
"Name object\n",
"Algebra int64\n",
"Geometry int64\n",
"Calculus int64\n",
"dtype: object"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 34
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:47:21.206335Z",
"start_time": "2024-10-08T15:47:21.201629Z"
}
},
"cell_type": "code",
"source": "x = np.array(['a', 'b'])",
"id": "13ae050678d6213b",
"outputs": [],
"execution_count": 36
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:47:26.540578Z",
"start_time": "2024-10-08T15:47:26.537656Z"
}
},
"cell_type": "code",
"source": "x[0] = 'hello, world'",
"id": "49ad7f2c773d1e7",
"outputs": [],
"execution_count": 37
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:47:28.785179Z",
"start_time": "2024-10-08T15:47:28.779415Z"
}
},
"cell_type": "code",
"source": "x",
"id": "557c5325df8a2eca",
"outputs": [
{
"data": {
"text/plain": [
"array(['h', 'b'], dtype='<U1')"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 38
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:49:06.998663Z",
"start_time": "2024-10-08T15:49:06.990629Z"
}
},
"cell_type": "code",
"source": "df",
"id": "4e085de14e3a9022",
"outputs": [
{
"data": {
"text/plain": [
" Name Algebra Geometry Calculus\n",
"0 Alice 1 2 3\n",
"1 Bob 4 5 6\n",
"2 Claudia 2 3 4"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Alice</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Bob</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Claudia</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 39
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:50:03.103963Z",
"start_time": "2024-10-08T15:50:03.092774Z"
}
},
"cell_type": "code",
"source": "df.set_index('Name')",
"id": "4f74b53e6070f457",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus\n",
"Name \n",
"Alice 1 2 3\n",
"Bob 4 5 6\n",
"Claudia 2 3 4"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 40
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:50:38.376550Z",
"start_time": "2024-10-08T15:50:38.373156Z"
}
},
"cell_type": "code",
"source": "df.set_index('Name', inplace=True)",
"id": "2f6b23df5741c65e",
"outputs": [],
"execution_count": 42
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:51:13.940890Z",
"start_time": "2024-10-08T15:51:13.936689Z"
}
},
"cell_type": "code",
"source": "df.index",
"id": "8c617b2a0d759241",
"outputs": [
{
"data": {
"text/plain": [
"Index(['Alice', 'Bob', 'Claudia'], dtype='object', name='Name')"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 44
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:51:21.838570Z",
"start_time": "2024-10-08T15:51:21.834417Z"
}
},
"cell_type": "code",
"source": "df.columns",
"id": "cc09541a90209d9b",
"outputs": [
{
"data": {
"text/plain": [
"Index(['Algebra', 'Geometry', 'Calculus'], dtype='object')"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 45
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:51:51.772858Z",
"start_time": "2024-10-08T15:51:51.766262Z"
}
},
"cell_type": "code",
"source": "pd.DataFrame([[1, 20, 54], [13, 23, 54]])",
"id": "65ecfd085804c270",
"outputs": [
{
"data": {
"text/plain": [
" 0 1 2\n",
"0 1 20 54\n",
"1 13 23 54"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>20</td>\n",
" <td>54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13</td>\n",
" <td>23</td>\n",
" <td>54</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 48
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:52:48.539608Z",
"start_time": "2024-10-08T15:52:46.557181Z"
}
},
"cell_type": "code",
"source": "df['Alice']",
"id": "179af1e5db29165e",
"outputs": [
{
"ename": "KeyError",
"evalue": "'Alice'",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)",
"File \u001B[0;32m~/miniconda3/envs/scientific-computing-2024-lesson01/lib/python3.11/site-packages/pandas/core/indexes/base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 3805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_engine\u001B[38;5;241m.\u001B[39mget_loc(casted_key)\n\u001B[1;32m 3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n",
"File \u001B[0;32mindex.pyx:167\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n",
"File \u001B[0;32mindex.pyx:196\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n",
"File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n",
"File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n",
"\u001B[0;31mKeyError\u001B[0m: 'Alice'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[0;32mIn[50], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m df[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mAlice\u001B[39m\u001B[38;5;124m'\u001B[39m]\n",
"File \u001B[0;32m~/miniconda3/envs/scientific-computing-2024-lesson01/lib/python3.11/site-packages/pandas/core/frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 4101\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[0;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mget_loc(key)\n\u001B[1;32m 4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[1;32m 4104\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n",
"File \u001B[0;32m~/miniconda3/envs/scientific-computing-2024-lesson01/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3807\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[1;32m 3808\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[1;32m 3809\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[1;32m 3810\u001B[0m ):\n\u001B[1;32m 3811\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[0;32m-> 3812\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[1;32m 3814\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[1;32m 3815\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[1;32m 3816\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[1;32m 3817\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n",
"\u001B[0;31mKeyError\u001B[0m: 'Alice'"
]
}
],
"execution_count": 50
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:53:37.251197Z",
"start_time": "2024-10-08T15:53:37.245867Z"
}
},
"cell_type": "code",
"source": "df['Algebra']",
"id": "24b55cb23a2f6f1c",
"outputs": [
{
"data": {
"text/plain": [
"Name\n",
"Alice 1\n",
"Bob 4\n",
"Claudia 2\n",
"Name: Algebra, dtype: int64"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 53
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:54:19.465857Z",
"start_time": "2024-10-08T15:54:19.452564Z"
}
},
"cell_type": "code",
"source": "df[0:2]",
"id": "77ce5c9c2520934b",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus\n",
"Name \n",
"Alice 1 2 3\n",
"Bob 4 5 6"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 54
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:56:13.903004Z",
"start_time": "2024-10-08T15:56:13.888144Z"
}
},
"cell_type": "code",
"source": "df.loc['Alice']",
"id": "b38088e6de57522f",
"outputs": [
{
"data": {
"text/plain": [
"Algebra 1\n",
"Geometry 2\n",
"Calculus 3\n",
"Name: Alice, dtype: int64"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 55
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:56:24.449812Z",
"start_time": "2024-10-08T15:56:24.444495Z"
}
},
"cell_type": "code",
"source": "df.loc['Alice', 'Geometry']",
"id": "e8383a54ea29fb9",
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 56
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:56:50.215031Z",
"start_time": "2024-10-08T15:56:50.208654Z"
}
},
"cell_type": "code",
"source": "df.loc[:, 'Geometry']",
"id": "c1c679fc3cb55af2",
"outputs": [
{
"data": {
"text/plain": [
"Name\n",
"Alice 2\n",
"Bob 5\n",
"Claudia 3\n",
"Name: Geometry, dtype: int64"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 57
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:57:34.160098Z",
"start_time": "2024-10-08T15:57:34.151531Z"
}
},
"cell_type": "code",
"source": "df.loc[:, 'Algebra':'Geometry']",
"id": "92e51914411d5b62",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry\n",
"Name \n",
"Alice 1 2\n",
"Bob 4 5\n",
"Claudia 2 3"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 58
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:58:11.624492Z",
"start_time": "2024-10-08T15:58:11.619370Z"
}
},
"cell_type": "code",
"source": "df.iloc[0]",
"id": "ecfe2bd31f85d56d",
"outputs": [
{
"data": {
"text/plain": [
"Algebra 1\n",
"Geometry 2\n",
"Calculus 3\n",
"Name: Alice, dtype: int64"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 59
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:59:10.000507Z",
"start_time": "2024-10-08T15:59:09.993420Z"
}
},
"cell_type": "code",
"source": "new_df = pd.DataFrame([[2, 3, 1], [5, 6, 7]], index=[1, 0])",
"id": "a73e0c8dc5022988",
"outputs": [],
"execution_count": 61
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:59:43.713664Z",
"start_time": "2024-10-08T15:59:43.706577Z"
}
},
"cell_type": "code",
"source": "new_df",
"id": "a661d5b014b5a344",
"outputs": [
{
"data": {
"text/plain": [
" 0 1 2\n",
"1 2 3 1\n",
"0 5 6 7"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 63
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:59:45.304516Z",
"start_time": "2024-10-08T15:59:45.295733Z"
}
},
"cell_type": "code",
"source": "new_df.loc[0]",
"id": "5419be4d652b9e85",
"outputs": [
{
"data": {
"text/plain": [
"0 5\n",
"1 6\n",
"2 7\n",
"Name: 0, dtype: int64"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 64
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T15:59:58.424924Z",
"start_time": "2024-10-08T15:59:58.419555Z"
}
},
"cell_type": "code",
"source": "new_df.iloc[0]",
"id": "cb64977e41cf4682",
"outputs": [
{
"data": {
"text/plain": [
"0 2\n",
"1 3\n",
"2 1\n",
"Name: 1, dtype: int64"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 65
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:01:48.210801Z",
"start_time": "2024-10-08T16:01:48.203565Z"
}
},
"cell_type": "code",
"source": "df[:2]['Algebra']",
"id": "7eda7667dbefe4a7",
"outputs": [
{
"data": {
"text/plain": [
"Name\n",
"Alice 1\n",
"Bob 4\n",
"Name: Algebra, dtype: int64"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 68
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:03:08.716241Z",
"start_time": "2024-10-08T16:03:08.711941Z"
}
},
"cell_type": "code",
"source": "df.loc['Alice', 'Algebra'] = 4",
"id": "cab7bf3e83ebd35d",
"outputs": [],
"execution_count": 70
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:03:12.528988Z",
"start_time": "2024-10-08T16:03:12.520874Z"
}
},
"cell_type": "code",
"source": "df",
"id": "24fa7e016321e8bc",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus\n",
"Name \n",
"Alice 4 2 3\n",
"Bob 4 5 6\n",
"Claudia 2 3 4"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 71
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:06:08.262014Z",
"start_time": "2024-10-08T16:06:08.258938Z"
}
},
"cell_type": "code",
"source": "",
"id": "ed133b29fa931d10",
"outputs": [],
"execution_count": 76
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:03:41.167057Z",
"start_time": "2024-10-08T16:03:41.162340Z"
}
},
"cell_type": "code",
"source": "df[:2]['Algebra'][b'Alice'] = 2",
"id": "e97047d9c12abfff",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/h2/9nyrt4p55kq6pdvqg02_zmj40000gn/T/ipykernel_65511/2528179609.py:1: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!\n",
"You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.\n",
"A typical example is when you are setting values in a column of a DataFrame, like:\n",
"\n",
"df[\"col\"][row_indexer] = value\n",
"\n",
"Use `df.loc[row_indexer, \"col\"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"\n",
" df[:2]['Algebra']['Alice'] = 2\n",
"/var/folders/h2/9nyrt4p55kq6pdvqg02_zmj40000gn/T/ipykernel_65511/2528179609.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[:2]['Algebra']['Alice'] = 2\n"
]
}
],
"execution_count": 73
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:04:00.928654Z",
"start_time": "2024-10-08T16:04:00.921255Z"
}
},
"cell_type": "code",
"source": "df",
"id": "3237c3cd7ec71940",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus\n",
"Name \n",
"Alice 2 2 3\n",
"Bob 4 5 6\n",
"Claudia 2 3 4"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 74
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:06:21.905650Z",
"start_time": "2024-10-08T16:06:21.901719Z"
}
},
"cell_type": "code",
"source": "short_df_algebra = df[:2]['Algebra']",
"id": "d647bb945acb6c93",
"outputs": [],
"execution_count": 77
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:06:35.652995Z",
"start_time": "2024-10-08T16:06:35.648937Z"
}
},
"cell_type": "code",
"source": "short_df_algebra['Alice'] = 3",
"id": "2f9087d3399ff1ae",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/h2/9nyrt4p55kq6pdvqg02_zmj40000gn/T/ipykernel_65511/3953284833.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" short_df_algebra['Alice'] = 3\n"
]
}
],
"execution_count": 79
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:07:24.320852Z",
"start_time": "2024-10-08T16:07:24.316380Z"
}
},
"cell_type": "code",
"source": "short_df_algebra_copy = df[:2]['Algebra'].copy()",
"id": "af4d4f114c425d31",
"outputs": [],
"execution_count": 80
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:07:34.287696Z",
"start_time": "2024-10-08T16:07:34.284191Z"
}
},
"cell_type": "code",
"source": "short_df_algebra_copy['Alice'] = 4",
"id": "b0825a89f074a7e7",
"outputs": [],
"execution_count": 81
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:09:28.979846Z",
"start_time": "2024-10-08T16:09:28.972651Z"
}
},
"cell_type": "code",
"source": "df.mean()",
"id": "115b60d43a4c7c25",
"outputs": [
{
"data": {
"text/plain": [
"Algebra 3.000000\n",
"Geometry 3.333333\n",
"Calculus 4.333333\n",
"dtype: float64"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 83
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:09:43.121430Z",
"start_time": "2024-10-08T16:09:43.113838Z"
}
},
"cell_type": "code",
"source": "df.mean(axis=1)",
"id": "e983faa20861e3d2",
"outputs": [
{
"data": {
"text/plain": [
"Name\n",
"Alice 2.666667\n",
"Bob 5.000000\n",
"Claudia 3.000000\n",
"dtype: float64"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 84
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:10:40.199613Z",
"start_time": "2024-10-08T16:10:40.194280Z"
}
},
"cell_type": "code",
"source": "df['mean'] = df.mean(axis=1)",
"id": "576f779c4c6b74c5",
"outputs": [],
"execution_count": 86
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:13:34.843002Z",
"start_time": "2024-10-08T16:13:34.827766Z"
}
},
"cell_type": "code",
"source": "df.assign(min=df.min(axis=1))",
"id": "504e046ff1540688",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean min\n",
"Name \n",
"Alice 3 2 3 2.666667 2.0\n",
"Bob 4 5 6 5.000000 4.0\n",
"Claudia 2 3 4 3.000000 2.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" <th>min</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5.000000</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.000000</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 97
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:11:58.268765Z",
"start_time": "2024-10-08T16:11:58.256238Z"
}
},
"cell_type": "code",
"source": "df",
"id": "23c33fdc79c41365",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean\n",
"Name \n",
"Alice 3 2 3 2.666667\n",
"Bob 4 5 6 5.000000\n",
"Claudia 2 3 4 3.000000"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 89
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:13:53.665332Z",
"start_time": "2024-10-08T16:13:53.654913Z"
}
},
"cell_type": "code",
"source": "df",
"id": "d1bb7e0052edbe72",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean\n",
"Name \n",
"Alice 3 2 3 2.666667\n",
"Bob 4 5 6 5.000000\n",
"Claudia 2 3 4 3.000000"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 98
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:14:14.516099Z",
"start_time": "2024-10-08T16:14:14.511226Z"
}
},
"cell_type": "code",
"source": "df['mean'] = df.mean(axis=1)",
"id": "7a74db6d35030136",
"outputs": [],
"execution_count": 99
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:15:15.673530Z",
"start_time": "2024-10-08T16:15:15.662127Z"
}
},
"cell_type": "code",
"source": "df",
"id": "81befa1c8d19d513",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean\n",
"Name \n",
"Alice 3 2 3 2.666667\n",
"Bob 4 5 6 5.000000\n",
"Claudia 2 3 4 3.000000"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 101
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:16:36.129185Z",
"start_time": "2024-10-08T16:16:36.125606Z"
}
},
"cell_type": "code",
"source": "df['sum'] = df.sum(axis=1)",
"id": "5f0c21aab5611ddf",
"outputs": [],
"execution_count": 107
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:16:37.435109Z",
"start_time": "2024-10-08T16:16:37.421695Z"
}
},
"cell_type": "code",
"source": "df",
"id": "e4b8ba2084ba3bea",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean sum\n",
"Name \n",
"Alice 3 2 3 2.666667 32.0\n",
"Bob 4 5 6 5.000000 60.0\n",
"Claudia 2 3 4 3.000000 36.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" <th>sum</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" <td>32.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5.000000</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.000000</td>\n",
" <td>36.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 108
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:19:36.689853Z",
"start_time": "2024-10-08T16:19:36.685647Z"
}
},
"cell_type": "code",
"source": "df_with_min = df.assign(min=df.min(axis=1))",
"id": "86de3c97fe9e4a6d",
"outputs": [],
"execution_count": 109
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:19:42.034512Z",
"start_time": "2024-10-08T16:19:42.026007Z"
}
},
"cell_type": "code",
"source": "df_with_min",
"id": "ef518947ab31162",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean sum min\n",
"Name \n",
"Alice 3 2 3 2.666667 32.0 2.0\n",
"Bob 4 5 6 5.000000 60.0 4.0\n",
"Claudia 2 3 4 3.000000 36.0 2.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" <th>sum</th>\n",
" <th>min</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" <td>32.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5.000000</td>\n",
" <td>60.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.000000</td>\n",
" <td>36.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 110
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:20:25.477586Z",
"start_time": "2024-10-08T16:20:25.465929Z"
}
},
"cell_type": "code",
"source": "df",
"id": "b5bc0fcf97a2cd18",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean sum\n",
"Name \n",
"Alice 3 2 3 2.666667 32.0\n",
"Bob 4 5 6 5.000000 60.0\n",
"Claudia 2 3 4 3.000000 36.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" <th>sum</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" <td>32.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bob</th>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5.000000</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.000000</td>\n",
" <td>36.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 111
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:22:24.434923Z",
"start_time": "2024-10-08T16:22:24.420386Z"
}
},
"cell_type": "code",
"source": "df[df['Algebra'] < 3]",
"id": "d3c0f759a9a24213",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean sum\n",
"Name \n",
"Claudia 2 3 4 3.0 36.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" <th>sum</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.0</td>\n",
" <td>36.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 112
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:22:42.527620Z",
"start_time": "2024-10-08T16:22:42.517953Z"
}
},
"cell_type": "code",
"source": "df['Algebra'] < 3",
"id": "2079cc1f63349655",
"outputs": [
{
"data": {
"text/plain": [
"Name\n",
"Alice False\n",
"Bob False\n",
"Claudia True\n",
"Name: Algebra, dtype: bool"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 113
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:27:39.790116Z",
"start_time": "2024-10-08T16:27:39.779115Z"
}
},
"cell_type": "code",
"source": "df.assign(min=df.min(axis=1))[lambda x: x['Algebra'] < 4]",
"id": "250866be51cb5f4c",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean sum min\n",
"Name \n",
"Alice 3 2 3 2.666667 32.0 2.0\n",
"Claudia 2 3 4 3.000000 36.0 2.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" <th>sum</th>\n",
" <th>min</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" <td>32.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudia</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3.000000</td>\n",
" <td>36.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 117
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:27:59.674071Z",
"start_time": "2024-10-08T16:27:59.659884Z"
}
},
"cell_type": "code",
"source": [
"df.assign(min=df.min(axis=1))[lambda x: (x['Algebra'] < 4) & \n",
" (x['Calculus'] < 4)]"
],
"id": "51fcbaf7e542ee1a",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean sum min\n",
"Name \n",
"Alice 3 2 3 2.666667 32.0 2.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" <th>sum</th>\n",
" <th>min</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" <td>32.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 118
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T16:28:23.502343Z",
"start_time": "2024-10-08T16:28:23.488279Z"
}
},
"cell_type": "code",
"source": "df.assign(min=df.min(axis=1)).query('Algebra < 4 and Calculus < 4')",
"id": "53370c5ed1c5c032",
"outputs": [
{
"data": {
"text/plain": [
" Algebra Geometry Calculus mean sum min\n",
"Name \n",
"Alice 3 2 3 2.666667 32.0 2.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Algebra</th>\n",
" <th>Geometry</th>\n",
" <th>Calculus</th>\n",
" <th>mean</th>\n",
" <th>sum</th>\n",
" <th>min</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alice</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2.666667</td>\n",
" <td>32.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 119
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:20:49.897358Z",
"start_time": "2024-10-08T17:20:49.445488Z"
}
},
"cell_type": "code",
"source": "df = pd.read_csv(\"https://github.com/anishshah23/IMDb-5000-Data-analysis/raw/refs/heads/master/movie_metadata.csv\")",
"id": "58316e37e2a2e987",
"outputs": [],
"execution_count": 121
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:21:46.134428Z",
"start_time": "2024-10-08T17:21:46.127301Z"
}
},
"cell_type": "code",
"source": "df.dtypes",
"id": "6172100cb64591a8",
"outputs": [
{
"data": {
"text/plain": [
"color object\n",
"director_name object\n",
"num_critic_for_reviews float64\n",
"duration float64\n",
"director_facebook_likes float64\n",
"actor_3_facebook_likes float64\n",
"actor_2_name object\n",
"actor_1_facebook_likes float64\n",
"gross float64\n",
"genres object\n",
"actor_1_name object\n",
"movie_title object\n",
"num_voted_users int64\n",
"cast_total_facebook_likes int64\n",
"actor_3_name object\n",
"facenumber_in_poster float64\n",
"plot_keywords object\n",
"movie_imdb_link object\n",
"num_user_for_reviews float64\n",
"language object\n",
"country object\n",
"content_rating object\n",
"budget float64\n",
"title_year float64\n",
"actor_2_facebook_likes float64\n",
"imdb_score float64\n",
"aspect_ratio float64\n",
"movie_facebook_likes int64\n",
"dtype: object"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 123
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:23:52.262302Z",
"start_time": "2024-10-08T17:23:52.244238Z"
}
},
"cell_type": "code",
"source": "df.info()",
"id": "a1f2bd455ecdddca",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 5043 entries, 0 to 5042\n",
"Data columns (total 28 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 color 5024 non-null object \n",
" 1 director_name 4939 non-null object \n",
" 2 num_critic_for_reviews 4993 non-null float64\n",
" 3 duration 5028 non-null float64\n",
" 4 director_facebook_likes 4939 non-null float64\n",
" 5 actor_3_facebook_likes 5020 non-null float64\n",
" 6 actor_2_name 5030 non-null object \n",
" 7 actor_1_facebook_likes 5036 non-null float64\n",
" 8 gross 4159 non-null float64\n",
" 9 genres 5043 non-null object \n",
" 10 actor_1_name 5036 non-null object \n",
" 11 movie_title 5043 non-null object \n",
" 12 num_voted_users 5043 non-null int64 \n",
" 13 cast_total_facebook_likes 5043 non-null int64 \n",
" 14 actor_3_name 5020 non-null object \n",
" 15 facenumber_in_poster 5030 non-null float64\n",
" 16 plot_keywords 4890 non-null object \n",
" 17 movie_imdb_link 5043 non-null object \n",
" 18 num_user_for_reviews 5022 non-null float64\n",
" 19 language 5029 non-null object \n",
" 20 country 5038 non-null object \n",
" 21 content_rating 4740 non-null object \n",
" 22 budget 4551 non-null float64\n",
" 23 title_year 4935 non-null float64\n",
" 24 actor_2_facebook_likes 5030 non-null float64\n",
" 25 imdb_score 5043 non-null float64\n",
" 26 aspect_ratio 4714 non-null float64\n",
" 27 movie_facebook_likes 5043 non-null int64 \n",
"dtypes: float64(13), int64(3), object(12)\n",
"memory usage: 1.1+ MB\n"
]
}
],
"execution_count": 124
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:24:23.236509Z",
"start_time": "2024-10-08T17:24:23.210935Z"
}
},
"cell_type": "code",
"source": "df",
"id": "bc6b3a6af6ec1241",
"outputs": [
{
"data": {
"text/plain": [
" color director_name num_critic_for_reviews duration \\\n",
"0 Color James Cameron 723.0 178.0 \n",
"1 Color Gore Verbinski 302.0 169.0 \n",
"2 Color Sam Mendes 602.0 148.0 \n",
"3 Color Christopher Nolan 813.0 164.0 \n",
"4 NaN Doug Walker NaN NaN \n",
"... ... ... ... ... \n",
"5038 Color Scott Smith 1.0 87.0 \n",
"5039 Color NaN 43.0 43.0 \n",
"5040 Color Benjamin Roberds 13.0 76.0 \n",
"5041 Color Daniel Hsia 14.0 100.0 \n",
"5042 Color Jon Gunn 43.0 90.0 \n",
"\n",
" director_facebook_likes actor_3_facebook_likes actor_2_name \\\n",
"0 0.0 855.0 Joel David Moore \n",
"1 563.0 1000.0 Orlando Bloom \n",
"2 0.0 161.0 Rory Kinnear \n",
"3 22000.0 23000.0 Christian Bale \n",
"4 131.0 NaN Rob Walker \n",
"... ... ... ... \n",
"5038 2.0 318.0 Daphne Zuniga \n",
"5039 NaN 319.0 Valorie Curry \n",
"5040 0.0 0.0 Maxwell Moody \n",
"5041 0.0 489.0 Daniel Henney \n",
"5042 16.0 16.0 Brian Herzlinger \n",
"\n",
" actor_1_facebook_likes gross genres \\\n",
"0 1000.0 760505847.0 Action|Adventure|Fantasy|Sci-Fi \n",
"1 40000.0 309404152.0 Action|Adventure|Fantasy \n",
"2 11000.0 200074175.0 Action|Adventure|Thriller \n",
"3 27000.0 448130642.0 Action|Thriller \n",
"4 131.0 NaN Documentary \n",
"... ... ... ... \n",
"5038 637.0 NaN Comedy|Drama \n",
"5039 841.0 NaN Crime|Drama|Mystery|Thriller \n",
"5040 0.0 NaN Drama|Horror|Thriller \n",
"5041 946.0 10443.0 Comedy|Drama|Romance \n",
"5042 86.0 85222.0 Documentary \n",
"\n",
" ... num_user_for_reviews language country content_rating budget \\\n",
"0 ... 3054.0 English USA PG-13 237000000.0 \n",
"1 ... 1238.0 English USA PG-13 300000000.0 \n",
"2 ... 994.0 English UK PG-13 245000000.0 \n",
"3 ... 2701.0 English USA PG-13 250000000.0 \n",
"4 ... NaN NaN NaN NaN NaN \n",
"... ... ... ... ... ... ... \n",
"5038 ... 6.0 English Canada NaN NaN \n",
"5039 ... 359.0 English USA TV-14 NaN \n",
"5040 ... 3.0 English USA NaN 1400.0 \n",
"5041 ... 9.0 English USA PG-13 NaN \n",
"5042 ... 84.0 English USA PG 1100.0 \n",
"\n",
" title_year actor_2_facebook_likes imdb_score aspect_ratio \\\n",
"0 2009.0 936.0 7.9 1.78 \n",
"1 2007.0 5000.0 7.1 2.35 \n",
"2 2015.0 393.0 6.8 2.35 \n",
"3 2012.0 23000.0 8.5 2.35 \n",
"4 NaN 12.0 7.1 NaN \n",
"... ... ... ... ... \n",
"5038 2013.0 470.0 7.7 NaN \n",
"5039 NaN 593.0 7.5 16.00 \n",
"5040 2013.0 0.0 6.3 NaN \n",
"5041 2012.0 719.0 6.3 2.35 \n",
"5042 2004.0 23.0 6.6 1.85 \n",
"\n",
" movie_facebook_likes \n",
"0 33000 \n",
"1 0 \n",
"2 85000 \n",
"3 164000 \n",
"4 0 \n",
"... ... \n",
"5038 84 \n",
"5039 32000 \n",
"5040 16 \n",
"5041 660 \n",
"5042 456 \n",
"\n",
"[5043 rows x 28 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>color</th>\n",
" <th>director_name</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>duration</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>actor_2_name</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>gross</th>\n",
" <th>genres</th>\n",
" <th>...</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>language</th>\n",
" <th>country</th>\n",
" <th>content_rating</th>\n",
" <th>budget</th>\n",
" <th>title_year</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>aspect_ratio</th>\n",
" <th>movie_facebook_likes</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Color</td>\n",
" <td>James Cameron</td>\n",
" <td>723.0</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>855.0</td>\n",
" <td>Joel David Moore</td>\n",
" <td>1000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
" <td>...</td>\n",
" <td>3054.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>237000000.0</td>\n",
" <td>2009.0</td>\n",
" <td>936.0</td>\n",
" <td>7.9</td>\n",
" <td>1.78</td>\n",
" <td>33000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Color</td>\n",
" <td>Gore Verbinski</td>\n",
" <td>302.0</td>\n",
" <td>169.0</td>\n",
" <td>563.0</td>\n",
" <td>1000.0</td>\n",
" <td>Orlando Bloom</td>\n",
" <td>40000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>Action|Adventure|Fantasy</td>\n",
" <td>...</td>\n",
" <td>1238.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>300000000.0</td>\n",
" <td>2007.0</td>\n",
" <td>5000.0</td>\n",
" <td>7.1</td>\n",
" <td>2.35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Color</td>\n",
" <td>Sam Mendes</td>\n",
" <td>602.0</td>\n",
" <td>148.0</td>\n",
" <td>0.0</td>\n",
" <td>161.0</td>\n",
" <td>Rory Kinnear</td>\n",
" <td>11000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>Action|Adventure|Thriller</td>\n",
" <td>...</td>\n",
" <td>994.0</td>\n",
" <td>English</td>\n",
" <td>UK</td>\n",
" <td>PG-13</td>\n",
" <td>245000000.0</td>\n",
" <td>2015.0</td>\n",
" <td>393.0</td>\n",
" <td>6.8</td>\n",
" <td>2.35</td>\n",
" <td>85000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Color</td>\n",
" <td>Christopher Nolan</td>\n",
" <td>813.0</td>\n",
" <td>164.0</td>\n",
" <td>22000.0</td>\n",
" <td>23000.0</td>\n",
" <td>Christian Bale</td>\n",
" <td>27000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>Action|Thriller</td>\n",
" <td>...</td>\n",
" <td>2701.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>250000000.0</td>\n",
" <td>2012.0</td>\n",
" <td>23000.0</td>\n",
" <td>8.5</td>\n",
" <td>2.35</td>\n",
" <td>164000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>Doug Walker</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>131.0</td>\n",
" <td>NaN</td>\n",
" <td>Rob Walker</td>\n",
" <td>131.0</td>\n",
" <td>NaN</td>\n",
" <td>Documentary</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>7.1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5038</th>\n",
" <td>Color</td>\n",
" <td>Scott Smith</td>\n",
" <td>1.0</td>\n",
" <td>87.0</td>\n",
" <td>2.0</td>\n",
" <td>318.0</td>\n",
" <td>Daphne Zuniga</td>\n",
" <td>637.0</td>\n",
" <td>NaN</td>\n",
" <td>Comedy|Drama</td>\n",
" <td>...</td>\n",
" <td>6.0</td>\n",
" <td>English</td>\n",
" <td>Canada</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2013.0</td>\n",
" <td>470.0</td>\n",
" <td>7.7</td>\n",
" <td>NaN</td>\n",
" <td>84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5039</th>\n",
" <td>Color</td>\n",
" <td>NaN</td>\n",
" <td>43.0</td>\n",
" <td>43.0</td>\n",
" <td>NaN</td>\n",
" <td>319.0</td>\n",
" <td>Valorie Curry</td>\n",
" <td>841.0</td>\n",
" <td>NaN</td>\n",
" <td>Crime|Drama|Mystery|Thriller</td>\n",
" <td>...</td>\n",
" <td>359.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>TV-14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>593.0</td>\n",
" <td>7.5</td>\n",
" <td>16.00</td>\n",
" <td>32000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5040</th>\n",
" <td>Color</td>\n",
" <td>Benjamin Roberds</td>\n",
" <td>13.0</td>\n",
" <td>76.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Maxwell Moody</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>Drama|Horror|Thriller</td>\n",
" <td>...</td>\n",
" <td>3.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>1400.0</td>\n",
" <td>2013.0</td>\n",
" <td>0.0</td>\n",
" <td>6.3</td>\n",
" <td>NaN</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5041</th>\n",
" <td>Color</td>\n",
" <td>Daniel Hsia</td>\n",
" <td>14.0</td>\n",
" <td>100.0</td>\n",
" <td>0.0</td>\n",
" <td>489.0</td>\n",
" <td>Daniel Henney</td>\n",
" <td>946.0</td>\n",
" <td>10443.0</td>\n",
" <td>Comedy|Drama|Romance</td>\n",
" <td>...</td>\n",
" <td>9.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>NaN</td>\n",
" <td>2012.0</td>\n",
" <td>719.0</td>\n",
" <td>6.3</td>\n",
" <td>2.35</td>\n",
" <td>660</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5042</th>\n",
" <td>Color</td>\n",
" <td>Jon Gunn</td>\n",
" <td>43.0</td>\n",
" <td>90.0</td>\n",
" <td>16.0</td>\n",
" <td>16.0</td>\n",
" <td>Brian Herzlinger</td>\n",
" <td>86.0</td>\n",
" <td>85222.0</td>\n",
" <td>Documentary</td>\n",
" <td>...</td>\n",
" <td>84.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG</td>\n",
" <td>1100.0</td>\n",
" <td>2004.0</td>\n",
" <td>23.0</td>\n",
" <td>6.6</td>\n",
" <td>1.85</td>\n",
" <td>456</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5043 rows × 28 columns</p>\n",
"</div>"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 125
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:30:12.256262Z",
"start_time": "2024-10-08T17:30:12.249681Z"
}
},
"cell_type": "code",
"source": "df['color'].value_counts(dropna=False)",
"id": "ea3ec3b93f5aa762",
"outputs": [
{
"data": {
"text/plain": [
"color\n",
"Color 4815\n",
" Black and White 209\n",
"NaN 19\n",
"Name: count, dtype: int64"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 135
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:29:50.835509Z",
"start_time": "2024-10-08T17:29:50.831686Z"
}
},
"cell_type": "code",
"source": "print(df['color'].value_counts(dropna=False))",
"id": "323bb04f0a7b2451",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"color\n",
"Color 4815\n",
" Black and White 209\n",
"NaN 19\n",
"Name: count, dtype: int64\n"
]
}
],
"execution_count": 134
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:26:49.001857Z",
"start_time": "2024-10-08T17:26:48.985140Z"
}
},
"cell_type": "code",
"source": "df.query('color == \"Black and White\"')",
"id": "c9be8c52f370dce",
"outputs": [
{
"data": {
"text/plain": [
"Empty DataFrame\n",
"Columns: [color, director_name, num_critic_for_reviews, duration, director_facebook_likes, actor_3_facebook_likes, actor_2_name, actor_1_facebook_likes, gross, genres, actor_1_name, movie_title, num_voted_users, cast_total_facebook_likes, actor_3_name, facenumber_in_poster, plot_keywords, movie_imdb_link, num_user_for_reviews, language, country, content_rating, budget, title_year, actor_2_facebook_likes, imdb_score, aspect_ratio, movie_facebook_likes]\n",
"Index: []\n",
"\n",
"[0 rows x 28 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>color</th>\n",
" <th>director_name</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>duration</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>actor_2_name</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>gross</th>\n",
" <th>genres</th>\n",
" <th>...</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>language</th>\n",
" <th>country</th>\n",
" <th>content_rating</th>\n",
" <th>budget</th>\n",
" <th>title_year</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>aspect_ratio</th>\n",
" <th>movie_facebook_likes</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 28 columns</p>\n",
"</div>"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 129
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:30:39.485659Z",
"start_time": "2024-10-08T17:30:39.457622Z"
}
},
"cell_type": "code",
"source": "df[df['color'] == ' Black and White']",
"id": "a730b1e40e507c2e",
"outputs": [
{
"data": {
"text/plain": [
" color director_name num_critic_for_reviews duration \\\n",
"111 Black and White Michael Bay 191.0 184.0 \n",
"149 Black and White Lee Tamahori 264.0 133.0 \n",
"257 Black and White Martin Scorsese 267.0 170.0 \n",
"272 Black and White Michael Mann 174.0 165.0 \n",
"286 Black and White Martin Campbell 400.0 144.0 \n",
"... ... ... ... ... \n",
"5005 Black and White Andrew Bujalski 52.0 109.0 \n",
"5008 Black and White Kevin Smith 136.0 102.0 \n",
"5015 Black and White Richard Linklater 61.0 100.0 \n",
"5022 Black and White Jim Chuchu 6.0 60.0 \n",
"5028 Black and White Ivan Kavanagh 12.0 83.0 \n",
"\n",
" director_facebook_likes actor_3_facebook_likes actor_2_name \\\n",
"111 0.0 691.0 Jaime King \n",
"149 93.0 746.0 Colin Salmon \n",
"257 17000.0 827.0 Adam Scott \n",
"272 0.0 780.0 Jada Pinkett Smith \n",
"286 258.0 834.0 Tobias Menzies \n",
"... ... ... ... \n",
"5005 26.0 3.0 Kate Dollenmayer \n",
"5008 0.0 216.0 Brian O'Halloran \n",
"5015 0.0 0.0 Richard Linklater \n",
"5022 0.0 4.0 Olwenya Maina \n",
"5028 18.0 0.0 Michael Parle \n",
"\n",
" actor_1_facebook_likes gross genres \\\n",
"111 3000.0 198539855.0 Action|Drama|History|Romance|War \n",
"149 769.0 160201106.0 Action|Adventure|Thriller \n",
"257 29000.0 102608827.0 Biography|Drama \n",
"272 10000.0 58183966.0 Biography|Drama|Sport \n",
"286 6000.0 167007184.0 Action|Adventure|Thriller \n",
"... ... ... ... \n",
"5005 26.0 NaN Comedy \n",
"5008 898.0 3151130.0 Comedy \n",
"5015 5.0 1227508.0 Comedy|Drama \n",
"5022 147.0 NaN Drama \n",
"5028 10.0 NaN Horror \n",
"\n",
" ... num_user_for_reviews language country content_rating budget \\\n",
"111 ... 1999.0 English USA PG-13 140000000.0 \n",
"149 ... 1185.0 English UK PG-13 142000000.0 \n",
"257 ... 799.0 English USA PG-13 110000000.0 \n",
"272 ... 386.0 English USA R 107000000.0 \n",
"286 ... 2301.0 English UK PG-13 150000000.0 \n",
"... ... ... ... ... ... ... \n",
"5005 ... 23.0 English USA R NaN \n",
"5008 ... 615.0 English USA R 230000.0 \n",
"5015 ... 80.0 English USA R 23000.0 \n",
"5022 ... 1.0 Swahili Kenya NaN 15000.0 \n",
"5028 ... 1.0 English Ireland NaN 10000.0 \n",
"\n",
" title_year actor_2_facebook_likes imdb_score aspect_ratio \\\n",
"111 2001.0 961.0 6.1 2.35 \n",
"149 2002.0 766.0 6.1 2.35 \n",
"257 2004.0 3000.0 7.5 2.35 \n",
"272 2001.0 851.0 6.8 2.35 \n",
"286 2006.0 1000.0 8.0 2.35 \n",
"... ... ... ... ... \n",
"5005 2005.0 6.0 6.9 1.66 \n",
"5008 1994.0 657.0 7.8 1.37 \n",
"5015 1991.0 0.0 7.1 1.37 \n",
"5022 2014.0 19.0 7.4 NaN \n",
"5028 2007.0 5.0 6.7 1.33 \n",
"\n",
" movie_facebook_likes \n",
"111 0 \n",
"149 0 \n",
"257 0 \n",
"272 0 \n",
"286 0 \n",
"... ... \n",
"5005 91 \n",
"5008 0 \n",
"5015 2000 \n",
"5022 45 \n",
"5028 105 \n",
"\n",
"[209 rows x 28 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>color</th>\n",
" <th>director_name</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>duration</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>actor_2_name</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>gross</th>\n",
" <th>genres</th>\n",
" <th>...</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>language</th>\n",
" <th>country</th>\n",
" <th>content_rating</th>\n",
" <th>budget</th>\n",
" <th>title_year</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>aspect_ratio</th>\n",
" <th>movie_facebook_likes</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>Black and White</td>\n",
" <td>Michael Bay</td>\n",
" <td>191.0</td>\n",
" <td>184.0</td>\n",
" <td>0.0</td>\n",
" <td>691.0</td>\n",
" <td>Jaime King</td>\n",
" <td>3000.0</td>\n",
" <td>198539855.0</td>\n",
" <td>Action|Drama|History|Romance|War</td>\n",
" <td>...</td>\n",
" <td>1999.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>140000000.0</td>\n",
" <td>2001.0</td>\n",
" <td>961.0</td>\n",
" <td>6.1</td>\n",
" <td>2.35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>149</th>\n",
" <td>Black and White</td>\n",
" <td>Lee Tamahori</td>\n",
" <td>264.0</td>\n",
" <td>133.0</td>\n",
" <td>93.0</td>\n",
" <td>746.0</td>\n",
" <td>Colin Salmon</td>\n",
" <td>769.0</td>\n",
" <td>160201106.0</td>\n",
" <td>Action|Adventure|Thriller</td>\n",
" <td>...</td>\n",
" <td>1185.0</td>\n",
" <td>English</td>\n",
" <td>UK</td>\n",
" <td>PG-13</td>\n",
" <td>142000000.0</td>\n",
" <td>2002.0</td>\n",
" <td>766.0</td>\n",
" <td>6.1</td>\n",
" <td>2.35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>257</th>\n",
" <td>Black and White</td>\n",
" <td>Martin Scorsese</td>\n",
" <td>267.0</td>\n",
" <td>170.0</td>\n",
" <td>17000.0</td>\n",
" <td>827.0</td>\n",
" <td>Adam Scott</td>\n",
" <td>29000.0</td>\n",
" <td>102608827.0</td>\n",
" <td>Biography|Drama</td>\n",
" <td>...</td>\n",
" <td>799.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>110000000.0</td>\n",
" <td>2004.0</td>\n",
" <td>3000.0</td>\n",
" <td>7.5</td>\n",
" <td>2.35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>272</th>\n",
" <td>Black and White</td>\n",
" <td>Michael Mann</td>\n",
" <td>174.0</td>\n",
" <td>165.0</td>\n",
" <td>0.0</td>\n",
" <td>780.0</td>\n",
" <td>Jada Pinkett Smith</td>\n",
" <td>10000.0</td>\n",
" <td>58183966.0</td>\n",
" <td>Biography|Drama|Sport</td>\n",
" <td>...</td>\n",
" <td>386.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>R</td>\n",
" <td>107000000.0</td>\n",
" <td>2001.0</td>\n",
" <td>851.0</td>\n",
" <td>6.8</td>\n",
" <td>2.35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>286</th>\n",
" <td>Black and White</td>\n",
" <td>Martin Campbell</td>\n",
" <td>400.0</td>\n",
" <td>144.0</td>\n",
" <td>258.0</td>\n",
" <td>834.0</td>\n",
" <td>Tobias Menzies</td>\n",
" <td>6000.0</td>\n",
" <td>167007184.0</td>\n",
" <td>Action|Adventure|Thriller</td>\n",
" <td>...</td>\n",
" <td>2301.0</td>\n",
" <td>English</td>\n",
" <td>UK</td>\n",
" <td>PG-13</td>\n",
" <td>150000000.0</td>\n",
" <td>2006.0</td>\n",
" <td>1000.0</td>\n",
" <td>8.0</td>\n",
" <td>2.35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5005</th>\n",
" <td>Black and White</td>\n",
" <td>Andrew Bujalski</td>\n",
" <td>52.0</td>\n",
" <td>109.0</td>\n",
" <td>26.0</td>\n",
" <td>3.0</td>\n",
" <td>Kate Dollenmayer</td>\n",
" <td>26.0</td>\n",
" <td>NaN</td>\n",
" <td>Comedy</td>\n",
" <td>...</td>\n",
" <td>23.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>R</td>\n",
" <td>NaN</td>\n",
" <td>2005.0</td>\n",
" <td>6.0</td>\n",
" <td>6.9</td>\n",
" <td>1.66</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5008</th>\n",
" <td>Black and White</td>\n",
" <td>Kevin Smith</td>\n",
" <td>136.0</td>\n",
" <td>102.0</td>\n",
" <td>0.0</td>\n",
" <td>216.0</td>\n",
" <td>Brian O'Halloran</td>\n",
" <td>898.0</td>\n",
" <td>3151130.0</td>\n",
" <td>Comedy</td>\n",
" <td>...</td>\n",
" <td>615.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>R</td>\n",
" <td>230000.0</td>\n",
" <td>1994.0</td>\n",
" <td>657.0</td>\n",
" <td>7.8</td>\n",
" <td>1.37</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5015</th>\n",
" <td>Black and White</td>\n",
" <td>Richard Linklater</td>\n",
" <td>61.0</td>\n",
" <td>100.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Richard Linklater</td>\n",
" <td>5.0</td>\n",
" <td>1227508.0</td>\n",
" <td>Comedy|Drama</td>\n",
" <td>...</td>\n",
" <td>80.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>R</td>\n",
" <td>23000.0</td>\n",
" <td>1991.0</td>\n",
" <td>0.0</td>\n",
" <td>7.1</td>\n",
" <td>1.37</td>\n",
" <td>2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5022</th>\n",
" <td>Black and White</td>\n",
" <td>Jim Chuchu</td>\n",
" <td>6.0</td>\n",
" <td>60.0</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>Olwenya Maina</td>\n",
" <td>147.0</td>\n",
" <td>NaN</td>\n",
" <td>Drama</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>Swahili</td>\n",
" <td>Kenya</td>\n",
" <td>NaN</td>\n",
" <td>15000.0</td>\n",
" <td>2014.0</td>\n",
" <td>19.0</td>\n",
" <td>7.4</td>\n",
" <td>NaN</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5028</th>\n",
" <td>Black and White</td>\n",
" <td>Ivan Kavanagh</td>\n",
" <td>12.0</td>\n",
" <td>83.0</td>\n",
" <td>18.0</td>\n",
" <td>0.0</td>\n",
" <td>Michael Parle</td>\n",
" <td>10.0</td>\n",
" <td>NaN</td>\n",
" <td>Horror</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>English</td>\n",
" <td>Ireland</td>\n",
" <td>NaN</td>\n",
" <td>10000.0</td>\n",
" <td>2007.0</td>\n",
" <td>5.0</td>\n",
" <td>6.7</td>\n",
" <td>1.33</td>\n",
" <td>105</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>209 rows × 28 columns</p>\n",
"</div>"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 136
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:34:00.225709Z",
"start_time": "2024-10-08T17:34:00.219871Z"
}
},
"cell_type": "code",
"source": "print(df['color'].str.strip().value_counts())",
"id": "eb7d569b0a16a83d",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"color\n",
"Color 4815\n",
"Black and White 209\n",
"Name: count, dtype: int64\n"
]
}
],
"execution_count": 141
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:34:23.192776Z",
"start_time": "2024-10-08T17:34:23.187459Z"
}
},
"cell_type": "code",
"source": "df['color'] = df['color'].str.strip()",
"id": "6c511b4cd832c11a",
"outputs": [],
"execution_count": 142
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:34:38.026800Z",
"start_time": "2024-10-08T17:34:38.022366Z"
}
},
"cell_type": "code",
"source": "print(df['color'].value_counts())",
"id": "421370c14d164abb",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"color\n",
"Color 4815\n",
"Black and White 209\n",
"Name: count, dtype: int64\n"
]
}
],
"execution_count": 144
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:38:26.651131Z",
"start_time": "2024-10-08T17:38:25.989420Z"
}
},
"cell_type": "code",
"source": [
"df = pd.read_csv(\"https://github.com/anishshah23/IMDb-5000-Data-analysis/raw/refs/heads/master/movie_metadata.csv\")\n",
"string_columns = df.select_dtypes(include='object').columns\n",
"for column in string_columns:\n",
" df[column] = df[column].str.strip()"
],
"id": "ecfe7507bf6f2c3f",
"outputs": [],
"execution_count": 150
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:40:28.350100Z",
"start_time": "2024-10-08T17:40:28.327395Z"
}
},
"cell_type": "code",
"source": "df.query('color == \"Color\"')['duration'].mean()",
"id": "86adde931d216bc2",
"outputs": [
{
"data": {
"text/plain": [
"107.04290772755675"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 152
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:40:35.855184Z",
"start_time": "2024-10-08T17:40:35.840405Z"
}
},
"cell_type": "code",
"source": "df.query('color == \"Black and White\"')['duration'].mean()",
"id": "f61eaecf5e26e8f9",
"outputs": [
{
"data": {
"text/plain": [
"112.2535885167464"
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 153
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:44:17.611370Z",
"start_time": "2024-10-08T17:44:17.602090Z"
}
},
"cell_type": "code",
"source": "df.groupby('color', dropna=False)['duration'].mean()",
"id": "4cd34e3be9e028a3",
"outputs": [
{
"data": {
"text/plain": [
"color\n",
"Black and White 112.253589\n",
"Color 107.042908\n",
"NaN 90.722222\n",
"Name: duration, dtype: float64"
]
},
"execution_count": 158,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 158
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:44:43.080513Z",
"start_time": "2024-10-08T17:44:43.074013Z"
}
},
"cell_type": "code",
"source": "np.array([3, 4, np.nan, 2]).mean()",
"id": "ffb46e3dc3c1c416",
"outputs": [
{
"data": {
"text/plain": [
"nan"
]
},
"execution_count": 160,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 160
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:45:59.341021Z",
"start_time": "2024-10-08T17:45:59.334140Z"
}
},
"cell_type": "code",
"source": "np.nanmean(np.array([3, 4, np.nan, 2]))",
"id": "4e01aeed52dd30c",
"outputs": [
{
"data": {
"text/plain": [
"3.0"
]
},
"execution_count": 165,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 165
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:45:05.541991Z",
"start_time": "2024-10-08T17:45:05.535455Z"
}
},
"cell_type": "code",
"source": "pd.Series([3, 4, np.nan, 2]).mean()",
"id": "f2fe112c353d1bb7",
"outputs": [
{
"data": {
"text/plain": [
"3.0"
]
},
"execution_count": 161,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 161
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:46:33.365827Z",
"start_time": "2024-10-08T17:46:33.358925Z"
}
},
"cell_type": "code",
"source": "pd.Series([3, 4, np.nan, 2]).fillna(0).mean()",
"id": "377b381a42b4739e",
"outputs": [
{
"data": {
"text/plain": [
"2.25"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 167
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:48:48.642412Z",
"start_time": "2024-10-08T17:48:48.633402Z"
}
},
"cell_type": "code",
"source": "df['director_name'].value_counts()['Tim Burton']",
"id": "31a7019b1e784ab8",
"outputs": [
{
"data": {
"text/plain": [
"16"
]
},
"execution_count": 171,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 171
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:49:17.944554Z",
"start_time": "2024-10-08T17:49:17.926433Z"
}
},
"cell_type": "code",
"source": "df.query('director_name == \"Tim Burton\"').shape",
"id": "a8307c748ac3c6f9",
"outputs": [
{
"data": {
"text/plain": [
"(16, 28)"
]
},
"execution_count": 173,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 173
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:51:15.450887Z",
"start_time": "2024-10-08T17:51:15.439895Z"
}
},
"cell_type": "code",
"source": "df['director_name'].value_counts().sort_values(ascending=False)",
"id": "f0043f5af99542d9",
"outputs": [
{
"data": {
"text/plain": [
"director_name\n",
"Steven Spielberg 26\n",
"Woody Allen 22\n",
"Clint Eastwood 20\n",
"Martin Scorsese 20\n",
"Ridley Scott 17\n",
" ..\n",
"Ryan Smith 1\n",
"Travis Romero 1\n",
"Andrew Haigh 1\n",
"Cary Bell 1\n",
"Daniel Hsia 1\n",
"Name: count, Length: 2398, dtype: int64"
]
},
"execution_count": 178,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 178
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:51:38.469367Z",
"start_time": "2024-10-08T17:51:38.455093Z"
}
},
"cell_type": "code",
"source": "df['director_name'].value_counts().sort_index()",
"id": "9d53e8ae34705305",
"outputs": [
{
"data": {
"text/plain": [
"director_name\n",
"A. Raven Cruz 1\n",
"Aaron Hann 1\n",
"Aaron Schneider 1\n",
"Aaron Seltzer 1\n",
"Abel Ferrara 1\n",
" ..\n",
"Zoran Lisinac 1\n",
"Álex de la Iglesia 1\n",
"Émile Gaudreault 1\n",
"Éric Tessier 1\n",
"Étienne Faure 1\n",
"Name: count, Length: 2398, dtype: int64"
]
},
"execution_count": 180,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 180
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T17:55:37.636735Z",
"start_time": "2024-10-08T17:55:37.617012Z"
}
},
"cell_type": "code",
"source": "df['director_name'].str.split(expand=True)",
"id": "6258364f5dec05e",
"outputs": [
{
"data": {
"text/plain": [
" 0 1 2 3\n",
"0 James Cameron None None\n",
"1 Gore Verbinski None None\n",
"2 Sam Mendes None None\n",
"3 Christopher Nolan None None\n",
"4 Doug Walker None None\n",
"... ... ... ... ...\n",
"5038 Scott Smith None None\n",
"5039 NaN NaN NaN NaN\n",
"5040 Benjamin Roberds None None\n",
"5041 Daniel Hsia None None\n",
"5042 Jon Gunn None None\n",
"\n",
"[5043 rows x 4 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>James</td>\n",
" <td>Cameron</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gore</td>\n",
" <td>Verbinski</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Sam</td>\n",
" <td>Mendes</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Christopher</td>\n",
" <td>Nolan</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Doug</td>\n",
" <td>Walker</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5038</th>\n",
" <td>Scott</td>\n",
" <td>Smith</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5039</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5040</th>\n",
" <td>Benjamin</td>\n",
" <td>Roberds</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5041</th>\n",
" <td>Daniel</td>\n",
" <td>Hsia</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5042</th>\n",
" <td>Jon</td>\n",
" <td>Gunn</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5043 rows × 4 columns</p>\n",
"</div>"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 189
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:02:56.097304Z",
"start_time": "2024-10-08T18:02:56.078209Z"
}
},
"cell_type": "code",
"source": [
"df_director_counts = df['director_name'].value_counts().to_frame().reset_index()\n",
"df_director_counts = pd.concat([df_director_counts,\n",
" df_director_counts['director_name'].str.split(expand=True).iloc[:, 1]],\n",
" axis=1).rename(columns={1: 'director_last_name'}).sort_values('director_last_name')"
],
"id": "7117e8cf88134f06",
"outputs": [],
"execution_count": 208
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:02:57.906129Z",
"start_time": "2024-10-08T18:02:57.892806Z"
}
},
"cell_type": "code",
"source": "df_director_counts",
"id": "b70bec6c9c2fc777",
"outputs": [
{
"data": {
"text/plain": [
" director_name count director_last_name\n",
"1239 John 'Bud' Cardos 1 'Bud'\n",
"1727 Brian A Miller 1 A\n",
"1790 William A. Fraker 1 A.\n",
"42 George A. Romero 9 A.\n",
"1282 Marius A. Markevicius 1 A.\n",
"... ... ... ...\n",
"1374 Valentine 1 None\n",
"1469 Remo 1 None\n",
"1868 Pitof 1 None\n",
"2089 Maïwenn 1 None\n",
"2276 RZA 1 None\n",
"\n",
"[2398 rows x 3 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>director_name</th>\n",
" <th>count</th>\n",
" <th>director_last_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1239</th>\n",
" <td>John 'Bud' Cardos</td>\n",
" <td>1</td>\n",
" <td>'Bud'</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1727</th>\n",
" <td>Brian A Miller</td>\n",
" <td>1</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1790</th>\n",
" <td>William A. Fraker</td>\n",
" <td>1</td>\n",
" <td>A.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>George A. Romero</td>\n",
" <td>9</td>\n",
" <td>A.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1282</th>\n",
" <td>Marius A. Markevicius</td>\n",
" <td>1</td>\n",
" <td>A.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1374</th>\n",
" <td>Valentine</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1469</th>\n",
" <td>Remo</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1868</th>\n",
" <td>Pitof</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2089</th>\n",
" <td>Maïwenn</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2276</th>\n",
" <td>RZA</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2398 rows × 3 columns</p>\n",
"</div>"
]
},
"execution_count": 209,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 209
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:00:47.208132Z",
"start_time": "2024-10-08T18:00:47.191247Z"
}
},
"cell_type": "code",
"source": "df_director_counts['director_name'].str.split(expand=True).iloc[:, 1]",
"id": "4c18507b580d6633",
"outputs": [
{
"data": {
"text/plain": [
"0 Spielberg\n",
"1 Allen\n",
"2 Eastwood\n",
"3 Scorsese\n",
"4 Scott\n",
" ... \n",
"2393 NaN\n",
"2394 NaN\n",
"2395 NaN\n",
"2396 NaN\n",
"2397 NaN\n",
"Name: 1, Length: 4796, dtype: object"
]
},
"execution_count": 202,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 202
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:00:15.853028Z",
"start_time": "2024-10-08T18:00:15.839475Z"
}
},
"cell_type": "code",
"source": "df_director_counts",
"id": "82be3b7b650df7b4",
"outputs": [
{
"data": {
"text/plain": [
" director_name count 1\n",
"0 Steven Spielberg 26.0 NaN\n",
"1 Woody Allen 22.0 NaN\n",
"2 Clint Eastwood 20.0 NaN\n",
"3 Martin Scorsese 20.0 NaN\n",
"4 Ridley Scott 17.0 NaN\n",
"... ... ... ...\n",
"2393 NaN NaN Crowley\n",
"2394 NaN NaN Pritts\n",
"2395 NaN NaN S.\n",
"2396 NaN NaN Cutler\n",
"2397 NaN NaN Hsia\n",
"\n",
"[4796 rows x 3 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>director_name</th>\n",
" <th>count</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Steven Spielberg</td>\n",
" <td>26.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Woody Allen</td>\n",
" <td>22.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Clint Eastwood</td>\n",
" <td>20.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Martin Scorsese</td>\n",
" <td>20.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Ridley Scott</td>\n",
" <td>17.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2393</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Crowley</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2394</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Pritts</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2395</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>S.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2396</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Cutler</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2397</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Hsia</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4796 rows × 3 columns</p>\n",
"</div>"
]
},
"execution_count": 201,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 201
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:06:33.032674Z",
"start_time": "2024-10-08T18:06:33.015512Z"
}
},
"cell_type": "code",
"source": "df.groupby('country')['imdb_score'].mean().sort_values(ascending=False)",
"id": "2bb56aebab8a4111",
"outputs": [
{
"data": {
"text/plain": [
"country\n",
"Kyrgyzstan 8.7\n",
"Libya 8.4\n",
"United Arab Emirates 8.2\n",
"Egypt 8.1\n",
"Soviet Union 8.1\n",
" ... \n",
"Georgia 5.6\n",
"Peru 5.4\n",
"Aruba 4.8\n",
"Bahamas 4.4\n",
"New Line 4.4\n",
"Name: imdb_score, Length: 65, dtype: float64"
]
},
"execution_count": 214,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 214
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:11:54.143116Z",
"start_time": "2024-10-08T18:11:54.125732Z"
}
},
"cell_type": "code",
"source": [
"(df.groupby('country')['imdb_score']\n",
" .agg(['mean', 'count'])\n",
" .sort_values('mean', ascending=False)\n",
" .query('count > 20'))"
],
"id": "8907b56f4d2de291",
"outputs": [
{
"data": {
"text/plain": [
" mean count\n",
"country \n",
"Japan 6.952174 23\n",
"Italy 6.873913 23\n",
"Spain 6.824242 33\n",
"UK 6.818304 448\n",
"France 6.678571 154\n",
"China 6.623333 30\n",
"India 6.532353 34\n",
"Australia 6.514545 55\n",
"USA 6.367428 3807\n",
"Germany 6.340206 97\n",
"Canada 6.161905 126"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <th>country</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Japan</th>\n",
" <td>6.952174</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Italy</th>\n",
" <td>6.873913</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Spain</th>\n",
" <td>6.824242</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>UK</th>\n",
" <td>6.818304</td>\n",
" <td>448</td>\n",
" </tr>\n",
" <tr>\n",
" <th>France</th>\n",
" <td>6.678571</td>\n",
" <td>154</td>\n",
" </tr>\n",
" <tr>\n",
" <th>China</th>\n",
" <td>6.623333</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>India</th>\n",
" <td>6.532353</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Australia</th>\n",
" <td>6.514545</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>USA</th>\n",
" <td>6.367428</td>\n",
" <td>3807</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Germany</th>\n",
" <td>6.340206</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Canada</th>\n",
" <td>6.161905</td>\n",
" <td>126</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 218,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 218
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:14:33.264465Z",
"start_time": "2024-10-08T18:14:33.260860Z"
}
},
"cell_type": "code",
"source": "# for each director, find his or her best movie",
"id": "371f40d89ab39dbc",
"outputs": [],
"execution_count": 220
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:16:10.047410Z",
"start_time": "2024-10-08T18:16:10.039605Z"
}
},
"cell_type": "code",
"source": "director_best_scores = df.groupby('director_name')['imdb_score'].max().to_frame().reset_index()",
"id": "e297620e06a498df",
"outputs": [],
"execution_count": 225
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:16:23.681135Z",
"start_time": "2024-10-08T18:16:23.611608Z"
}
},
"cell_type": "code",
"source": "df",
"id": "631121c47dd4f084",
"outputs": [
{
"data": {
"text/plain": [
" color director_name num_critic_for_reviews duration \\\n",
"0 Color James Cameron 723.0 178.0 \n",
"1 Color Gore Verbinski 302.0 169.0 \n",
"2 Color Sam Mendes 602.0 148.0 \n",
"3 Color Christopher Nolan 813.0 164.0 \n",
"4 NaN Doug Walker NaN NaN \n",
"... ... ... ... ... \n",
"5038 Color Scott Smith 1.0 87.0 \n",
"5039 Color NaN 43.0 43.0 \n",
"5040 Color Benjamin Roberds 13.0 76.0 \n",
"5041 Color Daniel Hsia 14.0 100.0 \n",
"5042 Color Jon Gunn 43.0 90.0 \n",
"\n",
" director_facebook_likes actor_3_facebook_likes actor_2_name \\\n",
"0 0.0 855.0 Joel David Moore \n",
"1 563.0 1000.0 Orlando Bloom \n",
"2 0.0 161.0 Rory Kinnear \n",
"3 22000.0 23000.0 Christian Bale \n",
"4 131.0 NaN Rob Walker \n",
"... ... ... ... \n",
"5038 2.0 318.0 Daphne Zuniga \n",
"5039 NaN 319.0 Valorie Curry \n",
"5040 0.0 0.0 Maxwell Moody \n",
"5041 0.0 489.0 Daniel Henney \n",
"5042 16.0 16.0 Brian Herzlinger \n",
"\n",
" actor_1_facebook_likes gross genres \\\n",
"0 1000.0 760505847.0 Action|Adventure|Fantasy|Sci-Fi \n",
"1 40000.0 309404152.0 Action|Adventure|Fantasy \n",
"2 11000.0 200074175.0 Action|Adventure|Thriller \n",
"3 27000.0 448130642.0 Action|Thriller \n",
"4 131.0 NaN Documentary \n",
"... ... ... ... \n",
"5038 637.0 NaN Comedy|Drama \n",
"5039 841.0 NaN Crime|Drama|Mystery|Thriller \n",
"5040 0.0 NaN Drama|Horror|Thriller \n",
"5041 946.0 10443.0 Comedy|Drama|Romance \n",
"5042 86.0 85222.0 Documentary \n",
"\n",
" ... num_user_for_reviews language country content_rating budget \\\n",
"0 ... 3054.0 English USA PG-13 237000000.0 \n",
"1 ... 1238.0 English USA PG-13 300000000.0 \n",
"2 ... 994.0 English UK PG-13 245000000.0 \n",
"3 ... 2701.0 English USA PG-13 250000000.0 \n",
"4 ... NaN NaN NaN NaN NaN \n",
"... ... ... ... ... ... ... \n",
"5038 ... 6.0 English Canada NaN NaN \n",
"5039 ... 359.0 English USA TV-14 NaN \n",
"5040 ... 3.0 English USA NaN 1400.0 \n",
"5041 ... 9.0 English USA PG-13 NaN \n",
"5042 ... 84.0 English USA PG 1100.0 \n",
"\n",
" title_year actor_2_facebook_likes imdb_score aspect_ratio \\\n",
"0 2009.0 936.0 7.9 1.78 \n",
"1 2007.0 5000.0 7.1 2.35 \n",
"2 2015.0 393.0 6.8 2.35 \n",
"3 2012.0 23000.0 8.5 2.35 \n",
"4 NaN 12.0 7.1 NaN \n",
"... ... ... ... ... \n",
"5038 2013.0 470.0 7.7 NaN \n",
"5039 NaN 593.0 7.5 16.00 \n",
"5040 2013.0 0.0 6.3 NaN \n",
"5041 2012.0 719.0 6.3 2.35 \n",
"5042 2004.0 23.0 6.6 1.85 \n",
"\n",
" movie_facebook_likes \n",
"0 33000 \n",
"1 0 \n",
"2 85000 \n",
"3 164000 \n",
"4 0 \n",
"... ... \n",
"5038 84 \n",
"5039 32000 \n",
"5040 16 \n",
"5041 660 \n",
"5042 456 \n",
"\n",
"[5043 rows x 28 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>color</th>\n",
" <th>director_name</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>duration</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>actor_2_name</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>gross</th>\n",
" <th>genres</th>\n",
" <th>...</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>language</th>\n",
" <th>country</th>\n",
" <th>content_rating</th>\n",
" <th>budget</th>\n",
" <th>title_year</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>aspect_ratio</th>\n",
" <th>movie_facebook_likes</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Color</td>\n",
" <td>James Cameron</td>\n",
" <td>723.0</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>855.0</td>\n",
" <td>Joel David Moore</td>\n",
" <td>1000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
" <td>...</td>\n",
" <td>3054.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>237000000.0</td>\n",
" <td>2009.0</td>\n",
" <td>936.0</td>\n",
" <td>7.9</td>\n",
" <td>1.78</td>\n",
" <td>33000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Color</td>\n",
" <td>Gore Verbinski</td>\n",
" <td>302.0</td>\n",
" <td>169.0</td>\n",
" <td>563.0</td>\n",
" <td>1000.0</td>\n",
" <td>Orlando Bloom</td>\n",
" <td>40000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>Action|Adventure|Fantasy</td>\n",
" <td>...</td>\n",
" <td>1238.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>300000000.0</td>\n",
" <td>2007.0</td>\n",
" <td>5000.0</td>\n",
" <td>7.1</td>\n",
" <td>2.35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Color</td>\n",
" <td>Sam Mendes</td>\n",
" <td>602.0</td>\n",
" <td>148.0</td>\n",
" <td>0.0</td>\n",
" <td>161.0</td>\n",
" <td>Rory Kinnear</td>\n",
" <td>11000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>Action|Adventure|Thriller</td>\n",
" <td>...</td>\n",
" <td>994.0</td>\n",
" <td>English</td>\n",
" <td>UK</td>\n",
" <td>PG-13</td>\n",
" <td>245000000.0</td>\n",
" <td>2015.0</td>\n",
" <td>393.0</td>\n",
" <td>6.8</td>\n",
" <td>2.35</td>\n",
" <td>85000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Color</td>\n",
" <td>Christopher Nolan</td>\n",
" <td>813.0</td>\n",
" <td>164.0</td>\n",
" <td>22000.0</td>\n",
" <td>23000.0</td>\n",
" <td>Christian Bale</td>\n",
" <td>27000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>Action|Thriller</td>\n",
" <td>...</td>\n",
" <td>2701.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>250000000.0</td>\n",
" <td>2012.0</td>\n",
" <td>23000.0</td>\n",
" <td>8.5</td>\n",
" <td>2.35</td>\n",
" <td>164000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>Doug Walker</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>131.0</td>\n",
" <td>NaN</td>\n",
" <td>Rob Walker</td>\n",
" <td>131.0</td>\n",
" <td>NaN</td>\n",
" <td>Documentary</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>7.1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5038</th>\n",
" <td>Color</td>\n",
" <td>Scott Smith</td>\n",
" <td>1.0</td>\n",
" <td>87.0</td>\n",
" <td>2.0</td>\n",
" <td>318.0</td>\n",
" <td>Daphne Zuniga</td>\n",
" <td>637.0</td>\n",
" <td>NaN</td>\n",
" <td>Comedy|Drama</td>\n",
" <td>...</td>\n",
" <td>6.0</td>\n",
" <td>English</td>\n",
" <td>Canada</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2013.0</td>\n",
" <td>470.0</td>\n",
" <td>7.7</td>\n",
" <td>NaN</td>\n",
" <td>84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5039</th>\n",
" <td>Color</td>\n",
" <td>NaN</td>\n",
" <td>43.0</td>\n",
" <td>43.0</td>\n",
" <td>NaN</td>\n",
" <td>319.0</td>\n",
" <td>Valorie Curry</td>\n",
" <td>841.0</td>\n",
" <td>NaN</td>\n",
" <td>Crime|Drama|Mystery|Thriller</td>\n",
" <td>...</td>\n",
" <td>359.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>TV-14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>593.0</td>\n",
" <td>7.5</td>\n",
" <td>16.00</td>\n",
" <td>32000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5040</th>\n",
" <td>Color</td>\n",
" <td>Benjamin Roberds</td>\n",
" <td>13.0</td>\n",
" <td>76.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Maxwell Moody</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>Drama|Horror|Thriller</td>\n",
" <td>...</td>\n",
" <td>3.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>1400.0</td>\n",
" <td>2013.0</td>\n",
" <td>0.0</td>\n",
" <td>6.3</td>\n",
" <td>NaN</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5041</th>\n",
" <td>Color</td>\n",
" <td>Daniel Hsia</td>\n",
" <td>14.0</td>\n",
" <td>100.0</td>\n",
" <td>0.0</td>\n",
" <td>489.0</td>\n",
" <td>Daniel Henney</td>\n",
" <td>946.0</td>\n",
" <td>10443.0</td>\n",
" <td>Comedy|Drama|Romance</td>\n",
" <td>...</td>\n",
" <td>9.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>NaN</td>\n",
" <td>2012.0</td>\n",
" <td>719.0</td>\n",
" <td>6.3</td>\n",
" <td>2.35</td>\n",
" <td>660</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5042</th>\n",
" <td>Color</td>\n",
" <td>Jon Gunn</td>\n",
" <td>43.0</td>\n",
" <td>90.0</td>\n",
" <td>16.0</td>\n",
" <td>16.0</td>\n",
" <td>Brian Herzlinger</td>\n",
" <td>86.0</td>\n",
" <td>85222.0</td>\n",
" <td>Documentary</td>\n",
" <td>...</td>\n",
" <td>84.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG</td>\n",
" <td>1100.0</td>\n",
" <td>2004.0</td>\n",
" <td>23.0</td>\n",
" <td>6.6</td>\n",
" <td>1.85</td>\n",
" <td>456</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5043 rows × 28 columns</p>\n",
"</div>"
]
},
"execution_count": 228,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 228
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:16:16.174205Z",
"start_time": "2024-10-08T18:16:16.157905Z"
}
},
"cell_type": "code",
"source": "director_best_scores",
"id": "8c29e9943824fee9",
"outputs": [
{
"data": {
"text/plain": [
" director_name imdb_score\n",
"0 A. Raven Cruz 1.9\n",
"1 Aaron Hann 6.0\n",
"2 Aaron Schneider 7.1\n",
"3 Aaron Seltzer 2.7\n",
"4 Abel Ferrara 6.6\n",
"... ... ...\n",
"2393 Zoran Lisinac 7.1\n",
"2394 Álex de la Iglesia 6.1\n",
"2395 Émile Gaudreault 6.7\n",
"2396 Éric Tessier 6.6\n",
"2397 Étienne Faure 4.3\n",
"\n",
"[2398 rows x 2 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>director_name</th>\n",
" <th>imdb_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A. Raven Cruz</td>\n",
" <td>1.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Aaron Hann</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Aaron Schneider</td>\n",
" <td>7.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Aaron Seltzer</td>\n",
" <td>2.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Abel Ferrara</td>\n",
" <td>6.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2393</th>\n",
" <td>Zoran Lisinac</td>\n",
" <td>7.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2394</th>\n",
" <td>Álex de la Iglesia</td>\n",
" <td>6.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2395</th>\n",
" <td>Émile Gaudreault</td>\n",
" <td>6.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2396</th>\n",
" <td>Éric Tessier</td>\n",
" <td>6.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2397</th>\n",
" <td>Étienne Faure</td>\n",
" <td>4.3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2398 rows × 2 columns</p>\n",
"</div>"
]
},
"execution_count": 227,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 227
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:18:58.437625Z",
"start_time": "2024-10-08T18:18:58.432489Z"
}
},
"cell_type": "code",
"source": "df.columns",
"id": "3563e71484d47b51",
"outputs": [
{
"data": {
"text/plain": [
"Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',\n",
" 'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',\n",
" 'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',\n",
" 'movie_title', 'num_voted_users', 'cast_total_facebook_likes',\n",
" 'actor_3_name', 'facenumber_in_poster', 'plot_keywords',\n",
" 'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',\n",
" 'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',\n",
" 'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],\n",
" dtype='object')"
]
},
"execution_count": 231,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 231
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:24:02.304914Z",
"start_time": "2024-10-08T18:24:02.272953Z"
}
},
"cell_type": "code",
"source": "df[['director_name', 'movie_title', 'title_year', 'imdb_score']].merge(director_best_scores.rename(columns={'imdb_score': 'best_imdb_score'}), on='director_name').query('imdb_score == best_imdb_score').sort_values('imdb_score', ascending=False)",
"id": "e6506afc9cec068",
"outputs": [
{
"data": {
"text/plain": [
" director_name movie_title title_year imdb_score \\\n",
"2712 John Blanchard Towering Inferno NaN 9.5 \n",
"1902 Frank Darabont The Shawshank Redemption 1994.0 9.3 \n",
"3401 Francis Ford Coppola The Godfather 1972.0 9.2 \n",
"4315 John Stockwell Kickboxer: Vengeance 2016.0 9.1 \n",
"66 Christopher Nolan The Dark Knight 2008.0 9.0 \n",
"... ... ... ... ... \n",
"4669 Georgia Hilton Subconscious 2015.0 2.2 \n",
"3278 Vondie Curtis-Hall Glitter 2001.0 2.1 \n",
"1698 Frédéric Auburtin United Passions 2014.0 2.0 \n",
"4507 A. Raven Cruz The Helix... Loaded 2005.0 1.9 \n",
"1115 Lawrence Kasanoff Foodfight! 2012.0 1.7 \n",
"\n",
" best_imdb_score \n",
"2712 9.5 \n",
"1902 9.3 \n",
"3401 9.2 \n",
"4315 9.1 \n",
"66 9.0 \n",
"... ... \n",
"4669 2.2 \n",
"3278 2.1 \n",
"1698 2.0 \n",
"4507 1.9 \n",
"1115 1.7 \n",
"\n",
"[2508 rows x 5 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>director_name</th>\n",
" <th>movie_title</th>\n",
" <th>title_year</th>\n",
" <th>imdb_score</th>\n",
" <th>best_imdb_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2712</th>\n",
" <td>John Blanchard</td>\n",
" <td>Towering Inferno</td>\n",
" <td>NaN</td>\n",
" <td>9.5</td>\n",
" <td>9.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1902</th>\n",
" <td>Frank Darabont</td>\n",
" <td>The Shawshank Redemption</td>\n",
" <td>1994.0</td>\n",
" <td>9.3</td>\n",
" <td>9.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3401</th>\n",
" <td>Francis Ford Coppola</td>\n",
" <td>The Godfather</td>\n",
" <td>1972.0</td>\n",
" <td>9.2</td>\n",
" <td>9.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4315</th>\n",
" <td>John Stockwell</td>\n",
" <td>Kickboxer: Vengeance</td>\n",
" <td>2016.0</td>\n",
" <td>9.1</td>\n",
" <td>9.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>Christopher Nolan</td>\n",
" <td>The Dark Knight</td>\n",
" <td>2008.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4669</th>\n",
" <td>Georgia Hilton</td>\n",
" <td>Subconscious</td>\n",
" <td>2015.0</td>\n",
" <td>2.2</td>\n",
" <td>2.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3278</th>\n",
" <td>Vondie Curtis-Hall</td>\n",
" <td>Glitter</td>\n",
" <td>2001.0</td>\n",
" <td>2.1</td>\n",
" <td>2.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1698</th>\n",
" <td>Frédéric Auburtin</td>\n",
" <td>United Passions</td>\n",
" <td>2014.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4507</th>\n",
" <td>A. Raven Cruz</td>\n",
" <td>The Helix... Loaded</td>\n",
" <td>2005.0</td>\n",
" <td>1.9</td>\n",
" <td>1.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1115</th>\n",
" <td>Lawrence Kasanoff</td>\n",
" <td>Foodfight!</td>\n",
" <td>2012.0</td>\n",
" <td>1.7</td>\n",
" <td>1.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2508 rows × 5 columns</p>\n",
"</div>"
]
},
"execution_count": 240,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 240
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:28:36.703177Z",
"start_time": "2024-10-08T18:28:36.696752Z"
}
},
"cell_type": "code",
"source": [
"left = pd.DataFrame({'x': ['a', 'b'], 'y': [1, 2]})\n",
"right = pd.DataFrame({'x': ['a', 'w', 'a'], 'u': [1, 2, 3]})\n"
],
"id": "1648d10f2911d705",
"outputs": [],
"execution_count": 244
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:29:48.439366Z",
"start_time": "2024-10-08T18:29:48.430714Z"
}
},
"cell_type": "code",
"source": "left",
"id": "9c5b84bba3423050",
"outputs": [
{
"data": {
"text/plain": [
" x y\n",
"0 a 1\n",
"1 b 2"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>b</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 247,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 247
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:29:55.862351Z",
"start_time": "2024-10-08T18:29:55.853999Z"
}
},
"cell_type": "code",
"source": "right",
"id": "29d6d8ee95ba5bda",
"outputs": [
{
"data": {
"text/plain": [
" x u\n",
"0 a 1\n",
"1 w 2\n",
"2 a 3"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>u</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>w</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>a</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 248,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 248
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:29:28.137673Z",
"start_time": "2024-10-08T18:29:28.126103Z"
}
},
"cell_type": "code",
"source": "left.merge(right, on='x', how='left')",
"id": "efb81d66199375f7",
"outputs": [
{
"data": {
"text/plain": [
" x y u\n",
"0 a 1 1.0\n",
"1 a 1 3.0\n",
"2 b 2 NaN"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>u</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>b</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 246,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 246
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:30:20.356012Z",
"start_time": "2024-10-08T18:30:20.344889Z"
}
},
"cell_type": "code",
"source": "left.merge(right, on='x', how='right')",
"id": "b5eebe4ccd140b17",
"outputs": [
{
"data": {
"text/plain": [
" x y u\n",
"0 a 1.0 1\n",
"1 w NaN 2\n",
"2 a 1.0 3"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>u</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>w</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>a</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 249,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 249
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:30:31.261375Z",
"start_time": "2024-10-08T18:30:31.240997Z"
}
},
"cell_type": "code",
"source": "left.merge(right, on='x', how='outer')",
"id": "4139b8db4a1d6c93",
"outputs": [
{
"data": {
"text/plain": [
" x y u\n",
"0 a 1.0 1.0\n",
"1 a 1.0 3.0\n",
"2 b 2.0 NaN\n",
"3 w NaN 2.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>u</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>a</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>b</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>w</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 250,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 250
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-08T18:30:50.012700Z",
"start_time": "2024-10-08T18:30:49.990956Z"
}
},
"cell_type": "code",
"source": "left.merge(right, how='cross')",
"id": "2e2bf71f33c3ffb9",
"outputs": [
{
"data": {
"text/plain": [
" x_x y x_y u\n",
"0 a 1 a 1\n",
"1 a 1 w 2\n",
"2 a 1 a 3\n",
"3 b 2 a 1\n",
"4 b 2 w 2\n",
"5 b 2 a 3"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x_x</th>\n",
" <th>y</th>\n",
" <th>x_y</th>\n",
" <th>u</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" <td>w</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" <td>a</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>b</td>\n",
" <td>2</td>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>b</td>\n",
" <td>2</td>\n",
" <td>w</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>b</td>\n",
" <td>2</td>\n",
" <td>a</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 252,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 252
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment