Created
October 9, 2024 09:22
-
-
Save ischurov/015009fb98ae31aea801edefdb2f0f29 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:37:37.910603Z", | |
"start_time": "2024-10-08T15:37:37.906545Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
], | |
"id": "c58cb687d261f85e", | |
"outputs": [], | |
"execution_count": 24 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:22:19.759802Z", | |
"start_time": "2024-10-08T15:22:19.749133Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "ser = pd.Series([0, 10, 20], index=['a', 'b', 'c'])", | |
"id": "bd3ec52619be18da", | |
"outputs": [], | |
"execution_count": 2 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:23:46.855862Z", | |
"start_time": "2024-10-08T15:23:46.851027Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "ser * 2", | |
"id": "bf11f176dd6eb449", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"a 0\n", | |
"b 20\n", | |
"c 40\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 4 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:24:48.946532Z", | |
"start_time": "2024-10-08T15:24:48.939546Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "ser['b']", | |
"id": "1f4a1b50dc99ca6d", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"10" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 6 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:25:05.735230Z", | |
"start_time": "2024-10-08T15:25:05.727388Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "pd.Series({'a': 0, 'b': 10})", | |
"id": "8ac58ae7312b1fb8", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"a 0\n", | |
"b 10\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 7 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:26:16.079105Z", | |
"start_time": "2024-10-08T15:26:16.074238Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "ser.iloc[1]", | |
"id": "c624ea4fb6d288cc", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"10" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 9 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:28:32.285833Z", | |
"start_time": "2024-10-08T15:28:32.281799Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"alice = pd.Series({'Algebra': 5, 'Geometry': 3, 'Music': 4})\n", | |
"bob = pd.Series({'Algebra': 4, 'Music': 3, 'History': 4})" | |
], | |
"id": "5a4b994054ef57d1", | |
"outputs": [], | |
"execution_count": 10 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:30:26.399757Z", | |
"start_time": "2024-10-08T15:30:26.392114Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "alice.dtype", | |
"id": "318f10ae9ae68be7", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"dtype('int64')" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 13 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:30:40.231293Z", | |
"start_time": "2024-10-08T15:30:40.226064Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "grade = alice + bob", | |
"id": "1f63449f7fe2ba89", | |
"outputs": [], | |
"execution_count": 14 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:30:44.898467Z", | |
"start_time": "2024-10-08T15:30:44.891996Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "grade.dtype", | |
"id": "98a269af4cce110b", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"dtype('float64')" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 15 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:30:51.322820Z", | |
"start_time": "2024-10-08T15:30:51.315929Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "grade", | |
"id": "64b2fc3f8558262f", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Algebra 9.0\n", | |
"Geometry NaN\n", | |
"History NaN\n", | |
"Music 7.0\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 16 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:32:50.711821Z", | |
"start_time": "2024-10-08T15:32:50.705023Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "grade['Geometry']", | |
"id": "15a8c8c5d9e5b2ed", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"nan" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 17 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:33:06.697152Z", | |
"start_time": "2024-10-08T15:33:06.692071Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "float(\"NaN\")", | |
"id": "7959b7dbf41f9f67", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"nan" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 18 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:33:52.682736Z", | |
"start_time": "2024-10-08T15:33:52.677059Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"if grade['Geometry'] == float(\"NaN\"):\n", | |
" print(\"It's a NaN\")\n", | |
"else:\n", | |
" print(\"????!!!\")" | |
], | |
"id": "43fad29e9b0ac7fc", | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"????!!!\n" | |
] | |
} | |
], | |
"execution_count": 19 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:34:38.834179Z", | |
"start_time": "2024-10-08T15:34:38.829401Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"if float(\"NaN\") == float(\"NaN\"):\n", | |
" print(\"It's a NaN\")\n", | |
"else:\n", | |
" print(\"????!!!\")" | |
], | |
"id": "903f09a11cd3cd84", | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"????!!!\n" | |
] | |
} | |
], | |
"execution_count": 20 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:36:51.374542Z", | |
"start_time": "2024-10-08T15:36:51.370393Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"if pd.isna(float(\"NaN\")):\n", | |
" print(\"Yes\")" | |
], | |
"id": "639ab94aca4ac217", | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Yes\n" | |
] | |
} | |
], | |
"execution_count": 22 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:37:11.332300Z", | |
"start_time": "2024-10-08T15:37:11.326852Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "pd.isna(None)", | |
"id": "fba4807a8f25e5af", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 23 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:37:48.652825Z", | |
"start_time": "2024-10-08T15:37:48.648008Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "np.isnan(float(\"NaN\"))", | |
"id": "7d0746a7a05b458", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 25 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:37:54.472780Z", | |
"start_time": "2024-10-08T15:37:53.617154Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "np.isnan(None)", | |
"id": "3b6e3cfacf72325e", | |
"outputs": [ | |
{ | |
"ename": "TypeError", | |
"evalue": "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''", | |
"output_type": "error", | |
"traceback": [ | |
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", | |
"\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)", | |
"Cell \u001B[0;32mIn[26], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m np\u001B[38;5;241m.\u001B[39misnan(\u001B[38;5;28;01mNone\u001B[39;00m)\n", | |
"\u001B[0;31mTypeError\u001B[0m: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''" | |
] | |
} | |
], | |
"execution_count": 26 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:38:46.761386Z", | |
"start_time": "2024-10-08T15:38:46.755984Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "alice", | |
"id": "c9e96c0b0b4d9bfa", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Algebra 5\n", | |
"Geometry 3\n", | |
"Music 4\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 27 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:39:57.648890Z", | |
"start_time": "2024-10-08T15:39:57.630252Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "alice['Algebra':'Geometry']", | |
"id": "762014ceac75eb64", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Algebra 5\n", | |
"Geometry 3\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 28 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:41:37.351614Z", | |
"start_time": "2024-10-08T15:41:37.346248Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "alice[0:1 + 1]", | |
"id": "a3a4732f7eb6c6c6", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Algebra 5\n", | |
"Geometry 3\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 30 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:42:44.651498Z", | |
"start_time": "2024-10-08T15:42:44.646548Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "alice.values", | |
"id": "2626541ab7b04f17", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([5, 3, 4])" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 31 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:45:34.761149Z", | |
"start_time": "2024-10-08T15:45:34.754972Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"df = pd.DataFrame([['Alice', 1, 2, 3], ['Bob', 4, 5, 6], ['Claudia', 2, 3, 4]],\n", | |
" columns=['Name', 'Algebra', 'Geometry', 'Calculus']\n", | |
" )" | |
], | |
"id": "acde8e41fddbe10c", | |
"outputs": [], | |
"execution_count": 32 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:46:17.242826Z", | |
"start_time": "2024-10-08T15:46:17.236930Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.dtypes", | |
"id": "cf37a1dfc0369020", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Name object\n", | |
"Algebra int64\n", | |
"Geometry int64\n", | |
"Calculus int64\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 34 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:47:21.206335Z", | |
"start_time": "2024-10-08T15:47:21.201629Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "x = np.array(['a', 'b'])", | |
"id": "13ae050678d6213b", | |
"outputs": [], | |
"execution_count": 36 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:47:26.540578Z", | |
"start_time": "2024-10-08T15:47:26.537656Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "x[0] = 'hello, world'", | |
"id": "49ad7f2c773d1e7", | |
"outputs": [], | |
"execution_count": 37 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:47:28.785179Z", | |
"start_time": "2024-10-08T15:47:28.779415Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "x", | |
"id": "557c5325df8a2eca", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(['h', 'b'], dtype='<U1')" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 38 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:49:06.998663Z", | |
"start_time": "2024-10-08T15:49:06.990629Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "4e085de14e3a9022", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Name Algebra Geometry Calculus\n", | |
"0 Alice 1 2 3\n", | |
"1 Bob 4 5 6\n", | |
"2 Claudia 2 3 4" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Name</th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Alice</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Bob</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Claudia</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 39 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:50:03.103963Z", | |
"start_time": "2024-10-08T15:50:03.092774Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.set_index('Name')", | |
"id": "4f74b53e6070f457", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus\n", | |
"Name \n", | |
"Alice 1 2 3\n", | |
"Bob 4 5 6\n", | |
"Claudia 2 3 4" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 40, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 40 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:50:38.376550Z", | |
"start_time": "2024-10-08T15:50:38.373156Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.set_index('Name', inplace=True)", | |
"id": "2f6b23df5741c65e", | |
"outputs": [], | |
"execution_count": 42 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:51:13.940890Z", | |
"start_time": "2024-10-08T15:51:13.936689Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.index", | |
"id": "8c617b2a0d759241", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['Alice', 'Bob', 'Claudia'], dtype='object', name='Name')" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 44 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:51:21.838570Z", | |
"start_time": "2024-10-08T15:51:21.834417Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.columns", | |
"id": "cc09541a90209d9b", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['Algebra', 'Geometry', 'Calculus'], dtype='object')" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 45 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:51:51.772858Z", | |
"start_time": "2024-10-08T15:51:51.766262Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "pd.DataFrame([[1, 20, 54], [13, 23, 54]])", | |
"id": "65ecfd085804c270", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" 0 1 2\n", | |
"0 1 20 54\n", | |
"1 13 23 54" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>20</td>\n", | |
" <td>54</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>13</td>\n", | |
" <td>23</td>\n", | |
" <td>54</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 48, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 48 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:52:48.539608Z", | |
"start_time": "2024-10-08T15:52:46.557181Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['Alice']", | |
"id": "179af1e5db29165e", | |
"outputs": [ | |
{ | |
"ename": "KeyError", | |
"evalue": "'Alice'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", | |
"\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", | |
"File \u001B[0;32m~/miniconda3/envs/scientific-computing-2024-lesson01/lib/python3.11/site-packages/pandas/core/indexes/base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 3805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_engine\u001B[38;5;241m.\u001B[39mget_loc(casted_key)\n\u001B[1;32m 3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", | |
"File \u001B[0;32mindex.pyx:167\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", | |
"File \u001B[0;32mindex.pyx:196\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", | |
"File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", | |
"File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", | |
"\u001B[0;31mKeyError\u001B[0m: 'Alice'", | |
"\nThe above exception was the direct cause of the following exception:\n", | |
"\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", | |
"Cell \u001B[0;32mIn[50], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m df[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mAlice\u001B[39m\u001B[38;5;124m'\u001B[39m]\n", | |
"File \u001B[0;32m~/miniconda3/envs/scientific-computing-2024-lesson01/lib/python3.11/site-packages/pandas/core/frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 4101\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[0;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mget_loc(key)\n\u001B[1;32m 4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[1;32m 4104\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", | |
"File \u001B[0;32m~/miniconda3/envs/scientific-computing-2024-lesson01/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3807\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[1;32m 3808\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[1;32m 3809\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[1;32m 3810\u001B[0m ):\n\u001B[1;32m 3811\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[0;32m-> 3812\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[1;32m 3814\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[1;32m 3815\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[1;32m 3816\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[1;32m 3817\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", | |
"\u001B[0;31mKeyError\u001B[0m: 'Alice'" | |
] | |
} | |
], | |
"execution_count": 50 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:53:37.251197Z", | |
"start_time": "2024-10-08T15:53:37.245867Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['Algebra']", | |
"id": "24b55cb23a2f6f1c", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Name\n", | |
"Alice 1\n", | |
"Bob 4\n", | |
"Claudia 2\n", | |
"Name: Algebra, dtype: int64" | |
] | |
}, | |
"execution_count": 53, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 53 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:54:19.465857Z", | |
"start_time": "2024-10-08T15:54:19.452564Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df[0:2]", | |
"id": "77ce5c9c2520934b", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus\n", | |
"Name \n", | |
"Alice 1 2 3\n", | |
"Bob 4 5 6" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 54 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:56:13.903004Z", | |
"start_time": "2024-10-08T15:56:13.888144Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.loc['Alice']", | |
"id": "b38088e6de57522f", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Algebra 1\n", | |
"Geometry 2\n", | |
"Calculus 3\n", | |
"Name: Alice, dtype: int64" | |
] | |
}, | |
"execution_count": 55, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 55 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:56:24.449812Z", | |
"start_time": "2024-10-08T15:56:24.444495Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.loc['Alice', 'Geometry']", | |
"id": "e8383a54ea29fb9", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2" | |
] | |
}, | |
"execution_count": 56, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 56 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:56:50.215031Z", | |
"start_time": "2024-10-08T15:56:50.208654Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.loc[:, 'Geometry']", | |
"id": "c1c679fc3cb55af2", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Name\n", | |
"Alice 2\n", | |
"Bob 5\n", | |
"Claudia 3\n", | |
"Name: Geometry, dtype: int64" | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 57 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:57:34.160098Z", | |
"start_time": "2024-10-08T15:57:34.151531Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.loc[:, 'Algebra':'Geometry']", | |
"id": "92e51914411d5b62", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry\n", | |
"Name \n", | |
"Alice 1 2\n", | |
"Bob 4 5\n", | |
"Claudia 2 3" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 58, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 58 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:58:11.624492Z", | |
"start_time": "2024-10-08T15:58:11.619370Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.iloc[0]", | |
"id": "ecfe2bd31f85d56d", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Algebra 1\n", | |
"Geometry 2\n", | |
"Calculus 3\n", | |
"Name: Alice, dtype: int64" | |
] | |
}, | |
"execution_count": 59, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 59 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:59:10.000507Z", | |
"start_time": "2024-10-08T15:59:09.993420Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "new_df = pd.DataFrame([[2, 3, 1], [5, 6, 7]], index=[1, 0])", | |
"id": "a73e0c8dc5022988", | |
"outputs": [], | |
"execution_count": 61 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:59:43.713664Z", | |
"start_time": "2024-10-08T15:59:43.706577Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "new_df", | |
"id": "a661d5b014b5a344", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" 0 1 2\n", | |
"1 2 3 1\n", | |
"0 5 6 7" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 63, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 63 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:59:45.304516Z", | |
"start_time": "2024-10-08T15:59:45.295733Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "new_df.loc[0]", | |
"id": "5419be4d652b9e85", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 5\n", | |
"1 6\n", | |
"2 7\n", | |
"Name: 0, dtype: int64" | |
] | |
}, | |
"execution_count": 64, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 64 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T15:59:58.424924Z", | |
"start_time": "2024-10-08T15:59:58.419555Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "new_df.iloc[0]", | |
"id": "cb64977e41cf4682", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 2\n", | |
"1 3\n", | |
"2 1\n", | |
"Name: 1, dtype: int64" | |
] | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 65 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:01:48.210801Z", | |
"start_time": "2024-10-08T16:01:48.203565Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df[:2]['Algebra']", | |
"id": "7eda7667dbefe4a7", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Name\n", | |
"Alice 1\n", | |
"Bob 4\n", | |
"Name: Algebra, dtype: int64" | |
] | |
}, | |
"execution_count": 68, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 68 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:03:08.716241Z", | |
"start_time": "2024-10-08T16:03:08.711941Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.loc['Alice', 'Algebra'] = 4", | |
"id": "cab7bf3e83ebd35d", | |
"outputs": [], | |
"execution_count": 70 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:03:12.528988Z", | |
"start_time": "2024-10-08T16:03:12.520874Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "24fa7e016321e8bc", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus\n", | |
"Name \n", | |
"Alice 4 2 3\n", | |
"Bob 4 5 6\n", | |
"Claudia 2 3 4" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 71, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 71 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:06:08.262014Z", | |
"start_time": "2024-10-08T16:06:08.258938Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "", | |
"id": "ed133b29fa931d10", | |
"outputs": [], | |
"execution_count": 76 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:03:41.167057Z", | |
"start_time": "2024-10-08T16:03:41.162340Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df[:2]['Algebra'][b'Alice'] = 2", | |
"id": "e97047d9c12abfff", | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/var/folders/h2/9nyrt4p55kq6pdvqg02_zmj40000gn/T/ipykernel_65511/2528179609.py:1: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!\n", | |
"You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.\n", | |
"A typical example is when you are setting values in a column of a DataFrame, like:\n", | |
"\n", | |
"df[\"col\"][row_indexer] = value\n", | |
"\n", | |
"Use `df.loc[row_indexer, \"col\"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.\n", | |
"\n", | |
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
"\n", | |
" df[:2]['Algebra']['Alice'] = 2\n", | |
"/var/folders/h2/9nyrt4p55kq6pdvqg02_zmj40000gn/T/ipykernel_65511/2528179609.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
" df[:2]['Algebra']['Alice'] = 2\n" | |
] | |
} | |
], | |
"execution_count": 73 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:04:00.928654Z", | |
"start_time": "2024-10-08T16:04:00.921255Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "3237c3cd7ec71940", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus\n", | |
"Name \n", | |
"Alice 2 2 3\n", | |
"Bob 4 5 6\n", | |
"Claudia 2 3 4" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 74, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 74 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:06:21.905650Z", | |
"start_time": "2024-10-08T16:06:21.901719Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "short_df_algebra = df[:2]['Algebra']", | |
"id": "d647bb945acb6c93", | |
"outputs": [], | |
"execution_count": 77 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:06:35.652995Z", | |
"start_time": "2024-10-08T16:06:35.648937Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "short_df_algebra['Alice'] = 3", | |
"id": "2f9087d3399ff1ae", | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/var/folders/h2/9nyrt4p55kq6pdvqg02_zmj40000gn/T/ipykernel_65511/3953284833.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
" short_df_algebra['Alice'] = 3\n" | |
] | |
} | |
], | |
"execution_count": 79 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:07:24.320852Z", | |
"start_time": "2024-10-08T16:07:24.316380Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "short_df_algebra_copy = df[:2]['Algebra'].copy()", | |
"id": "af4d4f114c425d31", | |
"outputs": [], | |
"execution_count": 80 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:07:34.287696Z", | |
"start_time": "2024-10-08T16:07:34.284191Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "short_df_algebra_copy['Alice'] = 4", | |
"id": "b0825a89f074a7e7", | |
"outputs": [], | |
"execution_count": 81 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:09:28.979846Z", | |
"start_time": "2024-10-08T16:09:28.972651Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.mean()", | |
"id": "115b60d43a4c7c25", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Algebra 3.000000\n", | |
"Geometry 3.333333\n", | |
"Calculus 4.333333\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 83, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 83 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:09:43.121430Z", | |
"start_time": "2024-10-08T16:09:43.113838Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.mean(axis=1)", | |
"id": "e983faa20861e3d2", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Name\n", | |
"Alice 2.666667\n", | |
"Bob 5.000000\n", | |
"Claudia 3.000000\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 84, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 84 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:10:40.199613Z", | |
"start_time": "2024-10-08T16:10:40.194280Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['mean'] = df.mean(axis=1)", | |
"id": "576f779c4c6b74c5", | |
"outputs": [], | |
"execution_count": 86 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:13:34.843002Z", | |
"start_time": "2024-10-08T16:13:34.827766Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.assign(min=df.min(axis=1))", | |
"id": "504e046ff1540688", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean min\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667 2.0\n", | |
"Bob 4 5 6 5.000000 4.0\n", | |
"Claudia 2 3 4 3.000000 2.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" <th>min</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>5.000000</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 97, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 97 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:11:58.268765Z", | |
"start_time": "2024-10-08T16:11:58.256238Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "23c33fdc79c41365", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667\n", | |
"Bob 4 5 6 5.000000\n", | |
"Claudia 2 3 4 3.000000" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>5.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 89, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 89 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:13:53.665332Z", | |
"start_time": "2024-10-08T16:13:53.654913Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "d1bb7e0052edbe72", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667\n", | |
"Bob 4 5 6 5.000000\n", | |
"Claudia 2 3 4 3.000000" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>5.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 98, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 98 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:14:14.516099Z", | |
"start_time": "2024-10-08T16:14:14.511226Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['mean'] = df.mean(axis=1)", | |
"id": "7a74db6d35030136", | |
"outputs": [], | |
"execution_count": 99 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:15:15.673530Z", | |
"start_time": "2024-10-08T16:15:15.662127Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "81befa1c8d19d513", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667\n", | |
"Bob 4 5 6 5.000000\n", | |
"Claudia 2 3 4 3.000000" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>5.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 101, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 101 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:16:36.129185Z", | |
"start_time": "2024-10-08T16:16:36.125606Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['sum'] = df.sum(axis=1)", | |
"id": "5f0c21aab5611ddf", | |
"outputs": [], | |
"execution_count": 107 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:16:37.435109Z", | |
"start_time": "2024-10-08T16:16:37.421695Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "e4b8ba2084ba3bea", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean sum\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667 32.0\n", | |
"Bob 4 5 6 5.000000 60.0\n", | |
"Claudia 2 3 4 3.000000 36.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" <th>sum</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" <td>32.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>5.000000</td>\n", | |
" <td>60.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>36.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 108, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 108 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:19:36.689853Z", | |
"start_time": "2024-10-08T16:19:36.685647Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df_with_min = df.assign(min=df.min(axis=1))", | |
"id": "86de3c97fe9e4a6d", | |
"outputs": [], | |
"execution_count": 109 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:19:42.034512Z", | |
"start_time": "2024-10-08T16:19:42.026007Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df_with_min", | |
"id": "ef518947ab31162", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean sum min\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667 32.0 2.0\n", | |
"Bob 4 5 6 5.000000 60.0 4.0\n", | |
"Claudia 2 3 4 3.000000 36.0 2.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" <th>sum</th>\n", | |
" <th>min</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" <td>32.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>5.000000</td>\n", | |
" <td>60.0</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>36.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 110, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 110 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:20:25.477586Z", | |
"start_time": "2024-10-08T16:20:25.465929Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "b5bc0fcf97a2cd18", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean sum\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667 32.0\n", | |
"Bob 4 5 6 5.000000 60.0\n", | |
"Claudia 2 3 4 3.000000 36.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" <th>sum</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" <td>32.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Bob</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>5.000000</td>\n", | |
" <td>60.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>36.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 111, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 111 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:22:24.434923Z", | |
"start_time": "2024-10-08T16:22:24.420386Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df[df['Algebra'] < 3]", | |
"id": "d3c0f759a9a24213", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean sum\n", | |
"Name \n", | |
"Claudia 2 3 4 3.0 36.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" <th>sum</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.0</td>\n", | |
" <td>36.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 112, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 112 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:22:42.527620Z", | |
"start_time": "2024-10-08T16:22:42.517953Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['Algebra'] < 3", | |
"id": "2079cc1f63349655", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Name\n", | |
"Alice False\n", | |
"Bob False\n", | |
"Claudia True\n", | |
"Name: Algebra, dtype: bool" | |
] | |
}, | |
"execution_count": 113, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 113 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:27:39.790116Z", | |
"start_time": "2024-10-08T16:27:39.779115Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.assign(min=df.min(axis=1))[lambda x: x['Algebra'] < 4]", | |
"id": "250866be51cb5f4c", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean sum min\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667 32.0 2.0\n", | |
"Claudia 2 3 4 3.000000 36.0 2.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" <th>sum</th>\n", | |
" <th>min</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" <td>32.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Claudia</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>36.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 117, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 117 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:27:59.674071Z", | |
"start_time": "2024-10-08T16:27:59.659884Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"df.assign(min=df.min(axis=1))[lambda x: (x['Algebra'] < 4) & \n", | |
" (x['Calculus'] < 4)]" | |
], | |
"id": "51fcbaf7e542ee1a", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean sum min\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667 32.0 2.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" <th>sum</th>\n", | |
" <th>min</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" <td>32.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 118, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 118 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T16:28:23.502343Z", | |
"start_time": "2024-10-08T16:28:23.488279Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.assign(min=df.min(axis=1)).query('Algebra < 4 and Calculus < 4')", | |
"id": "53370c5ed1c5c032", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" Algebra Geometry Calculus mean sum min\n", | |
"Name \n", | |
"Alice 3 2 3 2.666667 32.0 2.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Algebra</th>\n", | |
" <th>Geometry</th>\n", | |
" <th>Calculus</th>\n", | |
" <th>mean</th>\n", | |
" <th>sum</th>\n", | |
" <th>min</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Name</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Alice</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2.666667</td>\n", | |
" <td>32.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 119, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 119 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:20:49.897358Z", | |
"start_time": "2024-10-08T17:20:49.445488Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df = pd.read_csv(\"https://github.com/anishshah23/IMDb-5000-Data-analysis/raw/refs/heads/master/movie_metadata.csv\")", | |
"id": "58316e37e2a2e987", | |
"outputs": [], | |
"execution_count": 121 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:21:46.134428Z", | |
"start_time": "2024-10-08T17:21:46.127301Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.dtypes", | |
"id": "6172100cb64591a8", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"color object\n", | |
"director_name object\n", | |
"num_critic_for_reviews float64\n", | |
"duration float64\n", | |
"director_facebook_likes float64\n", | |
"actor_3_facebook_likes float64\n", | |
"actor_2_name object\n", | |
"actor_1_facebook_likes float64\n", | |
"gross float64\n", | |
"genres object\n", | |
"actor_1_name object\n", | |
"movie_title object\n", | |
"num_voted_users int64\n", | |
"cast_total_facebook_likes int64\n", | |
"actor_3_name object\n", | |
"facenumber_in_poster float64\n", | |
"plot_keywords object\n", | |
"movie_imdb_link object\n", | |
"num_user_for_reviews float64\n", | |
"language object\n", | |
"country object\n", | |
"content_rating object\n", | |
"budget float64\n", | |
"title_year float64\n", | |
"actor_2_facebook_likes float64\n", | |
"imdb_score float64\n", | |
"aspect_ratio float64\n", | |
"movie_facebook_likes int64\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 123, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 123 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:23:52.262302Z", | |
"start_time": "2024-10-08T17:23:52.244238Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.info()", | |
"id": "a1f2bd455ecdddca", | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 5043 entries, 0 to 5042\n", | |
"Data columns (total 28 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 color 5024 non-null object \n", | |
" 1 director_name 4939 non-null object \n", | |
" 2 num_critic_for_reviews 4993 non-null float64\n", | |
" 3 duration 5028 non-null float64\n", | |
" 4 director_facebook_likes 4939 non-null float64\n", | |
" 5 actor_3_facebook_likes 5020 non-null float64\n", | |
" 6 actor_2_name 5030 non-null object \n", | |
" 7 actor_1_facebook_likes 5036 non-null float64\n", | |
" 8 gross 4159 non-null float64\n", | |
" 9 genres 5043 non-null object \n", | |
" 10 actor_1_name 5036 non-null object \n", | |
" 11 movie_title 5043 non-null object \n", | |
" 12 num_voted_users 5043 non-null int64 \n", | |
" 13 cast_total_facebook_likes 5043 non-null int64 \n", | |
" 14 actor_3_name 5020 non-null object \n", | |
" 15 facenumber_in_poster 5030 non-null float64\n", | |
" 16 plot_keywords 4890 non-null object \n", | |
" 17 movie_imdb_link 5043 non-null object \n", | |
" 18 num_user_for_reviews 5022 non-null float64\n", | |
" 19 language 5029 non-null object \n", | |
" 20 country 5038 non-null object \n", | |
" 21 content_rating 4740 non-null object \n", | |
" 22 budget 4551 non-null float64\n", | |
" 23 title_year 4935 non-null float64\n", | |
" 24 actor_2_facebook_likes 5030 non-null float64\n", | |
" 25 imdb_score 5043 non-null float64\n", | |
" 26 aspect_ratio 4714 non-null float64\n", | |
" 27 movie_facebook_likes 5043 non-null int64 \n", | |
"dtypes: float64(13), int64(3), object(12)\n", | |
"memory usage: 1.1+ MB\n" | |
] | |
} | |
], | |
"execution_count": 124 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:24:23.236509Z", | |
"start_time": "2024-10-08T17:24:23.210935Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "bc6b3a6af6ec1241", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" color director_name num_critic_for_reviews duration \\\n", | |
"0 Color James Cameron 723.0 178.0 \n", | |
"1 Color Gore Verbinski 302.0 169.0 \n", | |
"2 Color Sam Mendes 602.0 148.0 \n", | |
"3 Color Christopher Nolan 813.0 164.0 \n", | |
"4 NaN Doug Walker NaN NaN \n", | |
"... ... ... ... ... \n", | |
"5038 Color Scott Smith 1.0 87.0 \n", | |
"5039 Color NaN 43.0 43.0 \n", | |
"5040 Color Benjamin Roberds 13.0 76.0 \n", | |
"5041 Color Daniel Hsia 14.0 100.0 \n", | |
"5042 Color Jon Gunn 43.0 90.0 \n", | |
"\n", | |
" director_facebook_likes actor_3_facebook_likes actor_2_name \\\n", | |
"0 0.0 855.0 Joel David Moore \n", | |
"1 563.0 1000.0 Orlando Bloom \n", | |
"2 0.0 161.0 Rory Kinnear \n", | |
"3 22000.0 23000.0 Christian Bale \n", | |
"4 131.0 NaN Rob Walker \n", | |
"... ... ... ... \n", | |
"5038 2.0 318.0 Daphne Zuniga \n", | |
"5039 NaN 319.0 Valorie Curry \n", | |
"5040 0.0 0.0 Maxwell Moody \n", | |
"5041 0.0 489.0 Daniel Henney \n", | |
"5042 16.0 16.0 Brian Herzlinger \n", | |
"\n", | |
" actor_1_facebook_likes gross genres \\\n", | |
"0 1000.0 760505847.0 Action|Adventure|Fantasy|Sci-Fi \n", | |
"1 40000.0 309404152.0 Action|Adventure|Fantasy \n", | |
"2 11000.0 200074175.0 Action|Adventure|Thriller \n", | |
"3 27000.0 448130642.0 Action|Thriller \n", | |
"4 131.0 NaN Documentary \n", | |
"... ... ... ... \n", | |
"5038 637.0 NaN Comedy|Drama \n", | |
"5039 841.0 NaN Crime|Drama|Mystery|Thriller \n", | |
"5040 0.0 NaN Drama|Horror|Thriller \n", | |
"5041 946.0 10443.0 Comedy|Drama|Romance \n", | |
"5042 86.0 85222.0 Documentary \n", | |
"\n", | |
" ... num_user_for_reviews language country content_rating budget \\\n", | |
"0 ... 3054.0 English USA PG-13 237000000.0 \n", | |
"1 ... 1238.0 English USA PG-13 300000000.0 \n", | |
"2 ... 994.0 English UK PG-13 245000000.0 \n", | |
"3 ... 2701.0 English USA PG-13 250000000.0 \n", | |
"4 ... NaN NaN NaN NaN NaN \n", | |
"... ... ... ... ... ... ... \n", | |
"5038 ... 6.0 English Canada NaN NaN \n", | |
"5039 ... 359.0 English USA TV-14 NaN \n", | |
"5040 ... 3.0 English USA NaN 1400.0 \n", | |
"5041 ... 9.0 English USA PG-13 NaN \n", | |
"5042 ... 84.0 English USA PG 1100.0 \n", | |
"\n", | |
" title_year actor_2_facebook_likes imdb_score aspect_ratio \\\n", | |
"0 2009.0 936.0 7.9 1.78 \n", | |
"1 2007.0 5000.0 7.1 2.35 \n", | |
"2 2015.0 393.0 6.8 2.35 \n", | |
"3 2012.0 23000.0 8.5 2.35 \n", | |
"4 NaN 12.0 7.1 NaN \n", | |
"... ... ... ... ... \n", | |
"5038 2013.0 470.0 7.7 NaN \n", | |
"5039 NaN 593.0 7.5 16.00 \n", | |
"5040 2013.0 0.0 6.3 NaN \n", | |
"5041 2012.0 719.0 6.3 2.35 \n", | |
"5042 2004.0 23.0 6.6 1.85 \n", | |
"\n", | |
" movie_facebook_likes \n", | |
"0 33000 \n", | |
"1 0 \n", | |
"2 85000 \n", | |
"3 164000 \n", | |
"4 0 \n", | |
"... ... \n", | |
"5038 84 \n", | |
"5039 32000 \n", | |
"5040 16 \n", | |
"5041 660 \n", | |
"5042 456 \n", | |
"\n", | |
"[5043 rows x 28 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>director_name</th>\n", | |
" <th>num_critic_for_reviews</th>\n", | |
" <th>duration</th>\n", | |
" <th>director_facebook_likes</th>\n", | |
" <th>actor_3_facebook_likes</th>\n", | |
" <th>actor_2_name</th>\n", | |
" <th>actor_1_facebook_likes</th>\n", | |
" <th>gross</th>\n", | |
" <th>genres</th>\n", | |
" <th>...</th>\n", | |
" <th>num_user_for_reviews</th>\n", | |
" <th>language</th>\n", | |
" <th>country</th>\n", | |
" <th>content_rating</th>\n", | |
" <th>budget</th>\n", | |
" <th>title_year</th>\n", | |
" <th>actor_2_facebook_likes</th>\n", | |
" <th>imdb_score</th>\n", | |
" <th>aspect_ratio</th>\n", | |
" <th>movie_facebook_likes</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Color</td>\n", | |
" <td>James Cameron</td>\n", | |
" <td>723.0</td>\n", | |
" <td>178.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>855.0</td>\n", | |
" <td>Joel David Moore</td>\n", | |
" <td>1000.0</td>\n", | |
" <td>760505847.0</td>\n", | |
" <td>Action|Adventure|Fantasy|Sci-Fi</td>\n", | |
" <td>...</td>\n", | |
" <td>3054.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>237000000.0</td>\n", | |
" <td>2009.0</td>\n", | |
" <td>936.0</td>\n", | |
" <td>7.9</td>\n", | |
" <td>1.78</td>\n", | |
" <td>33000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Color</td>\n", | |
" <td>Gore Verbinski</td>\n", | |
" <td>302.0</td>\n", | |
" <td>169.0</td>\n", | |
" <td>563.0</td>\n", | |
" <td>1000.0</td>\n", | |
" <td>Orlando Bloom</td>\n", | |
" <td>40000.0</td>\n", | |
" <td>309404152.0</td>\n", | |
" <td>Action|Adventure|Fantasy</td>\n", | |
" <td>...</td>\n", | |
" <td>1238.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>300000000.0</td>\n", | |
" <td>2007.0</td>\n", | |
" <td>5000.0</td>\n", | |
" <td>7.1</td>\n", | |
" <td>2.35</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Color</td>\n", | |
" <td>Sam Mendes</td>\n", | |
" <td>602.0</td>\n", | |
" <td>148.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>161.0</td>\n", | |
" <td>Rory Kinnear</td>\n", | |
" <td>11000.0</td>\n", | |
" <td>200074175.0</td>\n", | |
" <td>Action|Adventure|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>994.0</td>\n", | |
" <td>English</td>\n", | |
" <td>UK</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>245000000.0</td>\n", | |
" <td>2015.0</td>\n", | |
" <td>393.0</td>\n", | |
" <td>6.8</td>\n", | |
" <td>2.35</td>\n", | |
" <td>85000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Color</td>\n", | |
" <td>Christopher Nolan</td>\n", | |
" <td>813.0</td>\n", | |
" <td>164.0</td>\n", | |
" <td>22000.0</td>\n", | |
" <td>23000.0</td>\n", | |
" <td>Christian Bale</td>\n", | |
" <td>27000.0</td>\n", | |
" <td>448130642.0</td>\n", | |
" <td>Action|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>2701.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>250000000.0</td>\n", | |
" <td>2012.0</td>\n", | |
" <td>23000.0</td>\n", | |
" <td>8.5</td>\n", | |
" <td>2.35</td>\n", | |
" <td>164000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NaN</td>\n", | |
" <td>Doug Walker</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>131.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Rob Walker</td>\n", | |
" <td>131.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Documentary</td>\n", | |
" <td>...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>12.0</td>\n", | |
" <td>7.1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5038</th>\n", | |
" <td>Color</td>\n", | |
" <td>Scott Smith</td>\n", | |
" <td>1.0</td>\n", | |
" <td>87.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>318.0</td>\n", | |
" <td>Daphne Zuniga</td>\n", | |
" <td>637.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Comedy|Drama</td>\n", | |
" <td>...</td>\n", | |
" <td>6.0</td>\n", | |
" <td>English</td>\n", | |
" <td>Canada</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2013.0</td>\n", | |
" <td>470.0</td>\n", | |
" <td>7.7</td>\n", | |
" <td>NaN</td>\n", | |
" <td>84</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5039</th>\n", | |
" <td>Color</td>\n", | |
" <td>NaN</td>\n", | |
" <td>43.0</td>\n", | |
" <td>43.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>319.0</td>\n", | |
" <td>Valorie Curry</td>\n", | |
" <td>841.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Crime|Drama|Mystery|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>359.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>TV-14</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>593.0</td>\n", | |
" <td>7.5</td>\n", | |
" <td>16.00</td>\n", | |
" <td>32000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5040</th>\n", | |
" <td>Color</td>\n", | |
" <td>Benjamin Roberds</td>\n", | |
" <td>13.0</td>\n", | |
" <td>76.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Maxwell Moody</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Drama|Horror|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>3.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1400.0</td>\n", | |
" <td>2013.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>6.3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5041</th>\n", | |
" <td>Color</td>\n", | |
" <td>Daniel Hsia</td>\n", | |
" <td>14.0</td>\n", | |
" <td>100.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>489.0</td>\n", | |
" <td>Daniel Henney</td>\n", | |
" <td>946.0</td>\n", | |
" <td>10443.0</td>\n", | |
" <td>Comedy|Drama|Romance</td>\n", | |
" <td>...</td>\n", | |
" <td>9.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2012.0</td>\n", | |
" <td>719.0</td>\n", | |
" <td>6.3</td>\n", | |
" <td>2.35</td>\n", | |
" <td>660</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5042</th>\n", | |
" <td>Color</td>\n", | |
" <td>Jon Gunn</td>\n", | |
" <td>43.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>Brian Herzlinger</td>\n", | |
" <td>86.0</td>\n", | |
" <td>85222.0</td>\n", | |
" <td>Documentary</td>\n", | |
" <td>...</td>\n", | |
" <td>84.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG</td>\n", | |
" <td>1100.0</td>\n", | |
" <td>2004.0</td>\n", | |
" <td>23.0</td>\n", | |
" <td>6.6</td>\n", | |
" <td>1.85</td>\n", | |
" <td>456</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5043 rows × 28 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 125, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 125 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:30:12.256262Z", | |
"start_time": "2024-10-08T17:30:12.249681Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['color'].value_counts(dropna=False)", | |
"id": "ea3ec3b93f5aa762", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"color\n", | |
"Color 4815\n", | |
" Black and White 209\n", | |
"NaN 19\n", | |
"Name: count, dtype: int64" | |
] | |
}, | |
"execution_count": 135, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 135 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:29:50.835509Z", | |
"start_time": "2024-10-08T17:29:50.831686Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "print(df['color'].value_counts(dropna=False))", | |
"id": "323bb04f0a7b2451", | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"color\n", | |
"Color 4815\n", | |
" Black and White 209\n", | |
"NaN 19\n", | |
"Name: count, dtype: int64\n" | |
] | |
} | |
], | |
"execution_count": 134 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:26:49.001857Z", | |
"start_time": "2024-10-08T17:26:48.985140Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.query('color == \"Black and White\"')", | |
"id": "c9be8c52f370dce", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [color, director_name, num_critic_for_reviews, duration, director_facebook_likes, actor_3_facebook_likes, actor_2_name, actor_1_facebook_likes, gross, genres, actor_1_name, movie_title, num_voted_users, cast_total_facebook_likes, actor_3_name, facenumber_in_poster, plot_keywords, movie_imdb_link, num_user_for_reviews, language, country, content_rating, budget, title_year, actor_2_facebook_likes, imdb_score, aspect_ratio, movie_facebook_likes]\n", | |
"Index: []\n", | |
"\n", | |
"[0 rows x 28 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>director_name</th>\n", | |
" <th>num_critic_for_reviews</th>\n", | |
" <th>duration</th>\n", | |
" <th>director_facebook_likes</th>\n", | |
" <th>actor_3_facebook_likes</th>\n", | |
" <th>actor_2_name</th>\n", | |
" <th>actor_1_facebook_likes</th>\n", | |
" <th>gross</th>\n", | |
" <th>genres</th>\n", | |
" <th>...</th>\n", | |
" <th>num_user_for_reviews</th>\n", | |
" <th>language</th>\n", | |
" <th>country</th>\n", | |
" <th>content_rating</th>\n", | |
" <th>budget</th>\n", | |
" <th>title_year</th>\n", | |
" <th>actor_2_facebook_likes</th>\n", | |
" <th>imdb_score</th>\n", | |
" <th>aspect_ratio</th>\n", | |
" <th>movie_facebook_likes</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>0 rows × 28 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 129, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 129 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:30:39.485659Z", | |
"start_time": "2024-10-08T17:30:39.457622Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df[df['color'] == ' Black and White']", | |
"id": "a730b1e40e507c2e", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" color director_name num_critic_for_reviews duration \\\n", | |
"111 Black and White Michael Bay 191.0 184.0 \n", | |
"149 Black and White Lee Tamahori 264.0 133.0 \n", | |
"257 Black and White Martin Scorsese 267.0 170.0 \n", | |
"272 Black and White Michael Mann 174.0 165.0 \n", | |
"286 Black and White Martin Campbell 400.0 144.0 \n", | |
"... ... ... ... ... \n", | |
"5005 Black and White Andrew Bujalski 52.0 109.0 \n", | |
"5008 Black and White Kevin Smith 136.0 102.0 \n", | |
"5015 Black and White Richard Linklater 61.0 100.0 \n", | |
"5022 Black and White Jim Chuchu 6.0 60.0 \n", | |
"5028 Black and White Ivan Kavanagh 12.0 83.0 \n", | |
"\n", | |
" director_facebook_likes actor_3_facebook_likes actor_2_name \\\n", | |
"111 0.0 691.0 Jaime King \n", | |
"149 93.0 746.0 Colin Salmon \n", | |
"257 17000.0 827.0 Adam Scott \n", | |
"272 0.0 780.0 Jada Pinkett Smith \n", | |
"286 258.0 834.0 Tobias Menzies \n", | |
"... ... ... ... \n", | |
"5005 26.0 3.0 Kate Dollenmayer \n", | |
"5008 0.0 216.0 Brian O'Halloran \n", | |
"5015 0.0 0.0 Richard Linklater \n", | |
"5022 0.0 4.0 Olwenya Maina \n", | |
"5028 18.0 0.0 Michael Parle \n", | |
"\n", | |
" actor_1_facebook_likes gross genres \\\n", | |
"111 3000.0 198539855.0 Action|Drama|History|Romance|War \n", | |
"149 769.0 160201106.0 Action|Adventure|Thriller \n", | |
"257 29000.0 102608827.0 Biography|Drama \n", | |
"272 10000.0 58183966.0 Biography|Drama|Sport \n", | |
"286 6000.0 167007184.0 Action|Adventure|Thriller \n", | |
"... ... ... ... \n", | |
"5005 26.0 NaN Comedy \n", | |
"5008 898.0 3151130.0 Comedy \n", | |
"5015 5.0 1227508.0 Comedy|Drama \n", | |
"5022 147.0 NaN Drama \n", | |
"5028 10.0 NaN Horror \n", | |
"\n", | |
" ... num_user_for_reviews language country content_rating budget \\\n", | |
"111 ... 1999.0 English USA PG-13 140000000.0 \n", | |
"149 ... 1185.0 English UK PG-13 142000000.0 \n", | |
"257 ... 799.0 English USA PG-13 110000000.0 \n", | |
"272 ... 386.0 English USA R 107000000.0 \n", | |
"286 ... 2301.0 English UK PG-13 150000000.0 \n", | |
"... ... ... ... ... ... ... \n", | |
"5005 ... 23.0 English USA R NaN \n", | |
"5008 ... 615.0 English USA R 230000.0 \n", | |
"5015 ... 80.0 English USA R 23000.0 \n", | |
"5022 ... 1.0 Swahili Kenya NaN 15000.0 \n", | |
"5028 ... 1.0 English Ireland NaN 10000.0 \n", | |
"\n", | |
" title_year actor_2_facebook_likes imdb_score aspect_ratio \\\n", | |
"111 2001.0 961.0 6.1 2.35 \n", | |
"149 2002.0 766.0 6.1 2.35 \n", | |
"257 2004.0 3000.0 7.5 2.35 \n", | |
"272 2001.0 851.0 6.8 2.35 \n", | |
"286 2006.0 1000.0 8.0 2.35 \n", | |
"... ... ... ... ... \n", | |
"5005 2005.0 6.0 6.9 1.66 \n", | |
"5008 1994.0 657.0 7.8 1.37 \n", | |
"5015 1991.0 0.0 7.1 1.37 \n", | |
"5022 2014.0 19.0 7.4 NaN \n", | |
"5028 2007.0 5.0 6.7 1.33 \n", | |
"\n", | |
" movie_facebook_likes \n", | |
"111 0 \n", | |
"149 0 \n", | |
"257 0 \n", | |
"272 0 \n", | |
"286 0 \n", | |
"... ... \n", | |
"5005 91 \n", | |
"5008 0 \n", | |
"5015 2000 \n", | |
"5022 45 \n", | |
"5028 105 \n", | |
"\n", | |
"[209 rows x 28 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>director_name</th>\n", | |
" <th>num_critic_for_reviews</th>\n", | |
" <th>duration</th>\n", | |
" <th>director_facebook_likes</th>\n", | |
" <th>actor_3_facebook_likes</th>\n", | |
" <th>actor_2_name</th>\n", | |
" <th>actor_1_facebook_likes</th>\n", | |
" <th>gross</th>\n", | |
" <th>genres</th>\n", | |
" <th>...</th>\n", | |
" <th>num_user_for_reviews</th>\n", | |
" <th>language</th>\n", | |
" <th>country</th>\n", | |
" <th>content_rating</th>\n", | |
" <th>budget</th>\n", | |
" <th>title_year</th>\n", | |
" <th>actor_2_facebook_likes</th>\n", | |
" <th>imdb_score</th>\n", | |
" <th>aspect_ratio</th>\n", | |
" <th>movie_facebook_likes</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>111</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Michael Bay</td>\n", | |
" <td>191.0</td>\n", | |
" <td>184.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>691.0</td>\n", | |
" <td>Jaime King</td>\n", | |
" <td>3000.0</td>\n", | |
" <td>198539855.0</td>\n", | |
" <td>Action|Drama|History|Romance|War</td>\n", | |
" <td>...</td>\n", | |
" <td>1999.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>140000000.0</td>\n", | |
" <td>2001.0</td>\n", | |
" <td>961.0</td>\n", | |
" <td>6.1</td>\n", | |
" <td>2.35</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Lee Tamahori</td>\n", | |
" <td>264.0</td>\n", | |
" <td>133.0</td>\n", | |
" <td>93.0</td>\n", | |
" <td>746.0</td>\n", | |
" <td>Colin Salmon</td>\n", | |
" <td>769.0</td>\n", | |
" <td>160201106.0</td>\n", | |
" <td>Action|Adventure|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>1185.0</td>\n", | |
" <td>English</td>\n", | |
" <td>UK</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>142000000.0</td>\n", | |
" <td>2002.0</td>\n", | |
" <td>766.0</td>\n", | |
" <td>6.1</td>\n", | |
" <td>2.35</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>257</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Martin Scorsese</td>\n", | |
" <td>267.0</td>\n", | |
" <td>170.0</td>\n", | |
" <td>17000.0</td>\n", | |
" <td>827.0</td>\n", | |
" <td>Adam Scott</td>\n", | |
" <td>29000.0</td>\n", | |
" <td>102608827.0</td>\n", | |
" <td>Biography|Drama</td>\n", | |
" <td>...</td>\n", | |
" <td>799.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>110000000.0</td>\n", | |
" <td>2004.0</td>\n", | |
" <td>3000.0</td>\n", | |
" <td>7.5</td>\n", | |
" <td>2.35</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>272</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Michael Mann</td>\n", | |
" <td>174.0</td>\n", | |
" <td>165.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>780.0</td>\n", | |
" <td>Jada Pinkett Smith</td>\n", | |
" <td>10000.0</td>\n", | |
" <td>58183966.0</td>\n", | |
" <td>Biography|Drama|Sport</td>\n", | |
" <td>...</td>\n", | |
" <td>386.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>R</td>\n", | |
" <td>107000000.0</td>\n", | |
" <td>2001.0</td>\n", | |
" <td>851.0</td>\n", | |
" <td>6.8</td>\n", | |
" <td>2.35</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>286</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Martin Campbell</td>\n", | |
" <td>400.0</td>\n", | |
" <td>144.0</td>\n", | |
" <td>258.0</td>\n", | |
" <td>834.0</td>\n", | |
" <td>Tobias Menzies</td>\n", | |
" <td>6000.0</td>\n", | |
" <td>167007184.0</td>\n", | |
" <td>Action|Adventure|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>2301.0</td>\n", | |
" <td>English</td>\n", | |
" <td>UK</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>150000000.0</td>\n", | |
" <td>2006.0</td>\n", | |
" <td>1000.0</td>\n", | |
" <td>8.0</td>\n", | |
" <td>2.35</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5005</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Andrew Bujalski</td>\n", | |
" <td>52.0</td>\n", | |
" <td>109.0</td>\n", | |
" <td>26.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>Kate Dollenmayer</td>\n", | |
" <td>26.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Comedy</td>\n", | |
" <td>...</td>\n", | |
" <td>23.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>R</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2005.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>6.9</td>\n", | |
" <td>1.66</td>\n", | |
" <td>91</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5008</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Kevin Smith</td>\n", | |
" <td>136.0</td>\n", | |
" <td>102.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>216.0</td>\n", | |
" <td>Brian O'Halloran</td>\n", | |
" <td>898.0</td>\n", | |
" <td>3151130.0</td>\n", | |
" <td>Comedy</td>\n", | |
" <td>...</td>\n", | |
" <td>615.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>R</td>\n", | |
" <td>230000.0</td>\n", | |
" <td>1994.0</td>\n", | |
" <td>657.0</td>\n", | |
" <td>7.8</td>\n", | |
" <td>1.37</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5015</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Richard Linklater</td>\n", | |
" <td>61.0</td>\n", | |
" <td>100.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Richard Linklater</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1227508.0</td>\n", | |
" <td>Comedy|Drama</td>\n", | |
" <td>...</td>\n", | |
" <td>80.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>R</td>\n", | |
" <td>23000.0</td>\n", | |
" <td>1991.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>7.1</td>\n", | |
" <td>1.37</td>\n", | |
" <td>2000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5022</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Jim Chuchu</td>\n", | |
" <td>6.0</td>\n", | |
" <td>60.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>Olwenya Maina</td>\n", | |
" <td>147.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Drama</td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td>Swahili</td>\n", | |
" <td>Kenya</td>\n", | |
" <td>NaN</td>\n", | |
" <td>15000.0</td>\n", | |
" <td>2014.0</td>\n", | |
" <td>19.0</td>\n", | |
" <td>7.4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>45</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5028</th>\n", | |
" <td>Black and White</td>\n", | |
" <td>Ivan Kavanagh</td>\n", | |
" <td>12.0</td>\n", | |
" <td>83.0</td>\n", | |
" <td>18.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Michael Parle</td>\n", | |
" <td>10.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Horror</td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td>English</td>\n", | |
" <td>Ireland</td>\n", | |
" <td>NaN</td>\n", | |
" <td>10000.0</td>\n", | |
" <td>2007.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>6.7</td>\n", | |
" <td>1.33</td>\n", | |
" <td>105</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>209 rows × 28 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 136, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 136 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:34:00.225709Z", | |
"start_time": "2024-10-08T17:34:00.219871Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "print(df['color'].str.strip().value_counts())", | |
"id": "eb7d569b0a16a83d", | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"color\n", | |
"Color 4815\n", | |
"Black and White 209\n", | |
"Name: count, dtype: int64\n" | |
] | |
} | |
], | |
"execution_count": 141 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:34:23.192776Z", | |
"start_time": "2024-10-08T17:34:23.187459Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['color'] = df['color'].str.strip()", | |
"id": "6c511b4cd832c11a", | |
"outputs": [], | |
"execution_count": 142 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:34:38.026800Z", | |
"start_time": "2024-10-08T17:34:38.022366Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "print(df['color'].value_counts())", | |
"id": "421370c14d164abb", | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"color\n", | |
"Color 4815\n", | |
"Black and White 209\n", | |
"Name: count, dtype: int64\n" | |
] | |
} | |
], | |
"execution_count": 144 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:38:26.651131Z", | |
"start_time": "2024-10-08T17:38:25.989420Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"df = pd.read_csv(\"https://github.com/anishshah23/IMDb-5000-Data-analysis/raw/refs/heads/master/movie_metadata.csv\")\n", | |
"string_columns = df.select_dtypes(include='object').columns\n", | |
"for column in string_columns:\n", | |
" df[column] = df[column].str.strip()" | |
], | |
"id": "ecfe7507bf6f2c3f", | |
"outputs": [], | |
"execution_count": 150 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:40:28.350100Z", | |
"start_time": "2024-10-08T17:40:28.327395Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.query('color == \"Color\"')['duration'].mean()", | |
"id": "86adde931d216bc2", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"107.04290772755675" | |
] | |
}, | |
"execution_count": 152, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 152 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:40:35.855184Z", | |
"start_time": "2024-10-08T17:40:35.840405Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.query('color == \"Black and White\"')['duration'].mean()", | |
"id": "f61eaecf5e26e8f9", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"112.2535885167464" | |
] | |
}, | |
"execution_count": 153, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 153 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:44:17.611370Z", | |
"start_time": "2024-10-08T17:44:17.602090Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.groupby('color', dropna=False)['duration'].mean()", | |
"id": "4cd34e3be9e028a3", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"color\n", | |
"Black and White 112.253589\n", | |
"Color 107.042908\n", | |
"NaN 90.722222\n", | |
"Name: duration, dtype: float64" | |
] | |
}, | |
"execution_count": 158, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 158 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:44:43.080513Z", | |
"start_time": "2024-10-08T17:44:43.074013Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "np.array([3, 4, np.nan, 2]).mean()", | |
"id": "ffb46e3dc3c1c416", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"nan" | |
] | |
}, | |
"execution_count": 160, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 160 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:45:59.341021Z", | |
"start_time": "2024-10-08T17:45:59.334140Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "np.nanmean(np.array([3, 4, np.nan, 2]))", | |
"id": "4e01aeed52dd30c", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3.0" | |
] | |
}, | |
"execution_count": 165, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 165 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:45:05.541991Z", | |
"start_time": "2024-10-08T17:45:05.535455Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "pd.Series([3, 4, np.nan, 2]).mean()", | |
"id": "f2fe112c353d1bb7", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3.0" | |
] | |
}, | |
"execution_count": 161, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 161 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:46:33.365827Z", | |
"start_time": "2024-10-08T17:46:33.358925Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "pd.Series([3, 4, np.nan, 2]).fillna(0).mean()", | |
"id": "377b381a42b4739e", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2.25" | |
] | |
}, | |
"execution_count": 167, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 167 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:48:48.642412Z", | |
"start_time": "2024-10-08T17:48:48.633402Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['director_name'].value_counts()['Tim Burton']", | |
"id": "31a7019b1e784ab8", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"16" | |
] | |
}, | |
"execution_count": 171, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 171 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:49:17.944554Z", | |
"start_time": "2024-10-08T17:49:17.926433Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.query('director_name == \"Tim Burton\"').shape", | |
"id": "a8307c748ac3c6f9", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(16, 28)" | |
] | |
}, | |
"execution_count": 173, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 173 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:51:15.450887Z", | |
"start_time": "2024-10-08T17:51:15.439895Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['director_name'].value_counts().sort_values(ascending=False)", | |
"id": "f0043f5af99542d9", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"director_name\n", | |
"Steven Spielberg 26\n", | |
"Woody Allen 22\n", | |
"Clint Eastwood 20\n", | |
"Martin Scorsese 20\n", | |
"Ridley Scott 17\n", | |
" ..\n", | |
"Ryan Smith 1\n", | |
"Travis Romero 1\n", | |
"Andrew Haigh 1\n", | |
"Cary Bell 1\n", | |
"Daniel Hsia 1\n", | |
"Name: count, Length: 2398, dtype: int64" | |
] | |
}, | |
"execution_count": 178, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 178 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:51:38.469367Z", | |
"start_time": "2024-10-08T17:51:38.455093Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['director_name'].value_counts().sort_index()", | |
"id": "9d53e8ae34705305", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"director_name\n", | |
"A. Raven Cruz 1\n", | |
"Aaron Hann 1\n", | |
"Aaron Schneider 1\n", | |
"Aaron Seltzer 1\n", | |
"Abel Ferrara 1\n", | |
" ..\n", | |
"Zoran Lisinac 1\n", | |
"Álex de la Iglesia 1\n", | |
"Émile Gaudreault 1\n", | |
"Éric Tessier 1\n", | |
"Étienne Faure 1\n", | |
"Name: count, Length: 2398, dtype: int64" | |
] | |
}, | |
"execution_count": 180, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 180 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T17:55:37.636735Z", | |
"start_time": "2024-10-08T17:55:37.617012Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df['director_name'].str.split(expand=True)", | |
"id": "6258364f5dec05e", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" 0 1 2 3\n", | |
"0 James Cameron None None\n", | |
"1 Gore Verbinski None None\n", | |
"2 Sam Mendes None None\n", | |
"3 Christopher Nolan None None\n", | |
"4 Doug Walker None None\n", | |
"... ... ... ... ...\n", | |
"5038 Scott Smith None None\n", | |
"5039 NaN NaN NaN NaN\n", | |
"5040 Benjamin Roberds None None\n", | |
"5041 Daniel Hsia None None\n", | |
"5042 Jon Gunn None None\n", | |
"\n", | |
"[5043 rows x 4 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>James</td>\n", | |
" <td>Cameron</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Gore</td>\n", | |
" <td>Verbinski</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Sam</td>\n", | |
" <td>Mendes</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Christopher</td>\n", | |
" <td>Nolan</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Doug</td>\n", | |
" <td>Walker</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5038</th>\n", | |
" <td>Scott</td>\n", | |
" <td>Smith</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5039</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5040</th>\n", | |
" <td>Benjamin</td>\n", | |
" <td>Roberds</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5041</th>\n", | |
" <td>Daniel</td>\n", | |
" <td>Hsia</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5042</th>\n", | |
" <td>Jon</td>\n", | |
" <td>Gunn</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5043 rows × 4 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 189, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 189 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:02:56.097304Z", | |
"start_time": "2024-10-08T18:02:56.078209Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"df_director_counts = df['director_name'].value_counts().to_frame().reset_index()\n", | |
"df_director_counts = pd.concat([df_director_counts,\n", | |
" df_director_counts['director_name'].str.split(expand=True).iloc[:, 1]],\n", | |
" axis=1).rename(columns={1: 'director_last_name'}).sort_values('director_last_name')" | |
], | |
"id": "7117e8cf88134f06", | |
"outputs": [], | |
"execution_count": 208 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:02:57.906129Z", | |
"start_time": "2024-10-08T18:02:57.892806Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df_director_counts", | |
"id": "b70bec6c9c2fc777", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" director_name count director_last_name\n", | |
"1239 John 'Bud' Cardos 1 'Bud'\n", | |
"1727 Brian A Miller 1 A\n", | |
"1790 William A. Fraker 1 A.\n", | |
"42 George A. Romero 9 A.\n", | |
"1282 Marius A. Markevicius 1 A.\n", | |
"... ... ... ...\n", | |
"1374 Valentine 1 None\n", | |
"1469 Remo 1 None\n", | |
"1868 Pitof 1 None\n", | |
"2089 Maïwenn 1 None\n", | |
"2276 RZA 1 None\n", | |
"\n", | |
"[2398 rows x 3 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>director_name</th>\n", | |
" <th>count</th>\n", | |
" <th>director_last_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1239</th>\n", | |
" <td>John 'Bud' Cardos</td>\n", | |
" <td>1</td>\n", | |
" <td>'Bud'</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1727</th>\n", | |
" <td>Brian A Miller</td>\n", | |
" <td>1</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1790</th>\n", | |
" <td>William A. Fraker</td>\n", | |
" <td>1</td>\n", | |
" <td>A.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>42</th>\n", | |
" <td>George A. Romero</td>\n", | |
" <td>9</td>\n", | |
" <td>A.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1282</th>\n", | |
" <td>Marius A. Markevicius</td>\n", | |
" <td>1</td>\n", | |
" <td>A.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1374</th>\n", | |
" <td>Valentine</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1469</th>\n", | |
" <td>Remo</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1868</th>\n", | |
" <td>Pitof</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2089</th>\n", | |
" <td>Maïwenn</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2276</th>\n", | |
" <td>RZA</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>2398 rows × 3 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 209, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 209 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:00:47.208132Z", | |
"start_time": "2024-10-08T18:00:47.191247Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df_director_counts['director_name'].str.split(expand=True).iloc[:, 1]", | |
"id": "4c18507b580d6633", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 Spielberg\n", | |
"1 Allen\n", | |
"2 Eastwood\n", | |
"3 Scorsese\n", | |
"4 Scott\n", | |
" ... \n", | |
"2393 NaN\n", | |
"2394 NaN\n", | |
"2395 NaN\n", | |
"2396 NaN\n", | |
"2397 NaN\n", | |
"Name: 1, Length: 4796, dtype: object" | |
] | |
}, | |
"execution_count": 202, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 202 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:00:15.853028Z", | |
"start_time": "2024-10-08T18:00:15.839475Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df_director_counts", | |
"id": "82be3b7b650df7b4", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" director_name count 1\n", | |
"0 Steven Spielberg 26.0 NaN\n", | |
"1 Woody Allen 22.0 NaN\n", | |
"2 Clint Eastwood 20.0 NaN\n", | |
"3 Martin Scorsese 20.0 NaN\n", | |
"4 Ridley Scott 17.0 NaN\n", | |
"... ... ... ...\n", | |
"2393 NaN NaN Crowley\n", | |
"2394 NaN NaN Pritts\n", | |
"2395 NaN NaN S.\n", | |
"2396 NaN NaN Cutler\n", | |
"2397 NaN NaN Hsia\n", | |
"\n", | |
"[4796 rows x 3 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>director_name</th>\n", | |
" <th>count</th>\n", | |
" <th>1</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Steven Spielberg</td>\n", | |
" <td>26.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Woody Allen</td>\n", | |
" <td>22.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Clint Eastwood</td>\n", | |
" <td>20.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Martin Scorsese</td>\n", | |
" <td>20.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Ridley Scott</td>\n", | |
" <td>17.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2393</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Crowley</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2394</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Pritts</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2395</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S.</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2396</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Cutler</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2397</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Hsia</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>4796 rows × 3 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 201, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 201 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:06:33.032674Z", | |
"start_time": "2024-10-08T18:06:33.015512Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.groupby('country')['imdb_score'].mean().sort_values(ascending=False)", | |
"id": "2bb56aebab8a4111", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"country\n", | |
"Kyrgyzstan 8.7\n", | |
"Libya 8.4\n", | |
"United Arab Emirates 8.2\n", | |
"Egypt 8.1\n", | |
"Soviet Union 8.1\n", | |
" ... \n", | |
"Georgia 5.6\n", | |
"Peru 5.4\n", | |
"Aruba 4.8\n", | |
"Bahamas 4.4\n", | |
"New Line 4.4\n", | |
"Name: imdb_score, Length: 65, dtype: float64" | |
] | |
}, | |
"execution_count": 214, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 214 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:11:54.143116Z", | |
"start_time": "2024-10-08T18:11:54.125732Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"(df.groupby('country')['imdb_score']\n", | |
" .agg(['mean', 'count'])\n", | |
" .sort_values('mean', ascending=False)\n", | |
" .query('count > 20'))" | |
], | |
"id": "8907b56f4d2de291", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" mean count\n", | |
"country \n", | |
"Japan 6.952174 23\n", | |
"Italy 6.873913 23\n", | |
"Spain 6.824242 33\n", | |
"UK 6.818304 448\n", | |
"France 6.678571 154\n", | |
"China 6.623333 30\n", | |
"India 6.532353 34\n", | |
"Australia 6.514545 55\n", | |
"USA 6.367428 3807\n", | |
"Germany 6.340206 97\n", | |
"Canada 6.161905 126" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mean</th>\n", | |
" <th>count</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>country</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Japan</th>\n", | |
" <td>6.952174</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Italy</th>\n", | |
" <td>6.873913</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Spain</th>\n", | |
" <td>6.824242</td>\n", | |
" <td>33</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>UK</th>\n", | |
" <td>6.818304</td>\n", | |
" <td>448</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>France</th>\n", | |
" <td>6.678571</td>\n", | |
" <td>154</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>China</th>\n", | |
" <td>6.623333</td>\n", | |
" <td>30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>India</th>\n", | |
" <td>6.532353</td>\n", | |
" <td>34</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Australia</th>\n", | |
" <td>6.514545</td>\n", | |
" <td>55</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>USA</th>\n", | |
" <td>6.367428</td>\n", | |
" <td>3807</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Germany</th>\n", | |
" <td>6.340206</td>\n", | |
" <td>97</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Canada</th>\n", | |
" <td>6.161905</td>\n", | |
" <td>126</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 218, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 218 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:14:33.264465Z", | |
"start_time": "2024-10-08T18:14:33.260860Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "# for each director, find his or her best movie", | |
"id": "371f40d89ab39dbc", | |
"outputs": [], | |
"execution_count": 220 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:16:10.047410Z", | |
"start_time": "2024-10-08T18:16:10.039605Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "director_best_scores = df.groupby('director_name')['imdb_score'].max().to_frame().reset_index()", | |
"id": "e297620e06a498df", | |
"outputs": [], | |
"execution_count": 225 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:16:23.681135Z", | |
"start_time": "2024-10-08T18:16:23.611608Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"id": "631121c47dd4f084", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" color director_name num_critic_for_reviews duration \\\n", | |
"0 Color James Cameron 723.0 178.0 \n", | |
"1 Color Gore Verbinski 302.0 169.0 \n", | |
"2 Color Sam Mendes 602.0 148.0 \n", | |
"3 Color Christopher Nolan 813.0 164.0 \n", | |
"4 NaN Doug Walker NaN NaN \n", | |
"... ... ... ... ... \n", | |
"5038 Color Scott Smith 1.0 87.0 \n", | |
"5039 Color NaN 43.0 43.0 \n", | |
"5040 Color Benjamin Roberds 13.0 76.0 \n", | |
"5041 Color Daniel Hsia 14.0 100.0 \n", | |
"5042 Color Jon Gunn 43.0 90.0 \n", | |
"\n", | |
" director_facebook_likes actor_3_facebook_likes actor_2_name \\\n", | |
"0 0.0 855.0 Joel David Moore \n", | |
"1 563.0 1000.0 Orlando Bloom \n", | |
"2 0.0 161.0 Rory Kinnear \n", | |
"3 22000.0 23000.0 Christian Bale \n", | |
"4 131.0 NaN Rob Walker \n", | |
"... ... ... ... \n", | |
"5038 2.0 318.0 Daphne Zuniga \n", | |
"5039 NaN 319.0 Valorie Curry \n", | |
"5040 0.0 0.0 Maxwell Moody \n", | |
"5041 0.0 489.0 Daniel Henney \n", | |
"5042 16.0 16.0 Brian Herzlinger \n", | |
"\n", | |
" actor_1_facebook_likes gross genres \\\n", | |
"0 1000.0 760505847.0 Action|Adventure|Fantasy|Sci-Fi \n", | |
"1 40000.0 309404152.0 Action|Adventure|Fantasy \n", | |
"2 11000.0 200074175.0 Action|Adventure|Thriller \n", | |
"3 27000.0 448130642.0 Action|Thriller \n", | |
"4 131.0 NaN Documentary \n", | |
"... ... ... ... \n", | |
"5038 637.0 NaN Comedy|Drama \n", | |
"5039 841.0 NaN Crime|Drama|Mystery|Thriller \n", | |
"5040 0.0 NaN Drama|Horror|Thriller \n", | |
"5041 946.0 10443.0 Comedy|Drama|Romance \n", | |
"5042 86.0 85222.0 Documentary \n", | |
"\n", | |
" ... num_user_for_reviews language country content_rating budget \\\n", | |
"0 ... 3054.0 English USA PG-13 237000000.0 \n", | |
"1 ... 1238.0 English USA PG-13 300000000.0 \n", | |
"2 ... 994.0 English UK PG-13 245000000.0 \n", | |
"3 ... 2701.0 English USA PG-13 250000000.0 \n", | |
"4 ... NaN NaN NaN NaN NaN \n", | |
"... ... ... ... ... ... ... \n", | |
"5038 ... 6.0 English Canada NaN NaN \n", | |
"5039 ... 359.0 English USA TV-14 NaN \n", | |
"5040 ... 3.0 English USA NaN 1400.0 \n", | |
"5041 ... 9.0 English USA PG-13 NaN \n", | |
"5042 ... 84.0 English USA PG 1100.0 \n", | |
"\n", | |
" title_year actor_2_facebook_likes imdb_score aspect_ratio \\\n", | |
"0 2009.0 936.0 7.9 1.78 \n", | |
"1 2007.0 5000.0 7.1 2.35 \n", | |
"2 2015.0 393.0 6.8 2.35 \n", | |
"3 2012.0 23000.0 8.5 2.35 \n", | |
"4 NaN 12.0 7.1 NaN \n", | |
"... ... ... ... ... \n", | |
"5038 2013.0 470.0 7.7 NaN \n", | |
"5039 NaN 593.0 7.5 16.00 \n", | |
"5040 2013.0 0.0 6.3 NaN \n", | |
"5041 2012.0 719.0 6.3 2.35 \n", | |
"5042 2004.0 23.0 6.6 1.85 \n", | |
"\n", | |
" movie_facebook_likes \n", | |
"0 33000 \n", | |
"1 0 \n", | |
"2 85000 \n", | |
"3 164000 \n", | |
"4 0 \n", | |
"... ... \n", | |
"5038 84 \n", | |
"5039 32000 \n", | |
"5040 16 \n", | |
"5041 660 \n", | |
"5042 456 \n", | |
"\n", | |
"[5043 rows x 28 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>director_name</th>\n", | |
" <th>num_critic_for_reviews</th>\n", | |
" <th>duration</th>\n", | |
" <th>director_facebook_likes</th>\n", | |
" <th>actor_3_facebook_likes</th>\n", | |
" <th>actor_2_name</th>\n", | |
" <th>actor_1_facebook_likes</th>\n", | |
" <th>gross</th>\n", | |
" <th>genres</th>\n", | |
" <th>...</th>\n", | |
" <th>num_user_for_reviews</th>\n", | |
" <th>language</th>\n", | |
" <th>country</th>\n", | |
" <th>content_rating</th>\n", | |
" <th>budget</th>\n", | |
" <th>title_year</th>\n", | |
" <th>actor_2_facebook_likes</th>\n", | |
" <th>imdb_score</th>\n", | |
" <th>aspect_ratio</th>\n", | |
" <th>movie_facebook_likes</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Color</td>\n", | |
" <td>James Cameron</td>\n", | |
" <td>723.0</td>\n", | |
" <td>178.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>855.0</td>\n", | |
" <td>Joel David Moore</td>\n", | |
" <td>1000.0</td>\n", | |
" <td>760505847.0</td>\n", | |
" <td>Action|Adventure|Fantasy|Sci-Fi</td>\n", | |
" <td>...</td>\n", | |
" <td>3054.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>237000000.0</td>\n", | |
" <td>2009.0</td>\n", | |
" <td>936.0</td>\n", | |
" <td>7.9</td>\n", | |
" <td>1.78</td>\n", | |
" <td>33000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Color</td>\n", | |
" <td>Gore Verbinski</td>\n", | |
" <td>302.0</td>\n", | |
" <td>169.0</td>\n", | |
" <td>563.0</td>\n", | |
" <td>1000.0</td>\n", | |
" <td>Orlando Bloom</td>\n", | |
" <td>40000.0</td>\n", | |
" <td>309404152.0</td>\n", | |
" <td>Action|Adventure|Fantasy</td>\n", | |
" <td>...</td>\n", | |
" <td>1238.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>300000000.0</td>\n", | |
" <td>2007.0</td>\n", | |
" <td>5000.0</td>\n", | |
" <td>7.1</td>\n", | |
" <td>2.35</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Color</td>\n", | |
" <td>Sam Mendes</td>\n", | |
" <td>602.0</td>\n", | |
" <td>148.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>161.0</td>\n", | |
" <td>Rory Kinnear</td>\n", | |
" <td>11000.0</td>\n", | |
" <td>200074175.0</td>\n", | |
" <td>Action|Adventure|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>994.0</td>\n", | |
" <td>English</td>\n", | |
" <td>UK</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>245000000.0</td>\n", | |
" <td>2015.0</td>\n", | |
" <td>393.0</td>\n", | |
" <td>6.8</td>\n", | |
" <td>2.35</td>\n", | |
" <td>85000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Color</td>\n", | |
" <td>Christopher Nolan</td>\n", | |
" <td>813.0</td>\n", | |
" <td>164.0</td>\n", | |
" <td>22000.0</td>\n", | |
" <td>23000.0</td>\n", | |
" <td>Christian Bale</td>\n", | |
" <td>27000.0</td>\n", | |
" <td>448130642.0</td>\n", | |
" <td>Action|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>2701.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>250000000.0</td>\n", | |
" <td>2012.0</td>\n", | |
" <td>23000.0</td>\n", | |
" <td>8.5</td>\n", | |
" <td>2.35</td>\n", | |
" <td>164000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NaN</td>\n", | |
" <td>Doug Walker</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>131.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Rob Walker</td>\n", | |
" <td>131.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Documentary</td>\n", | |
" <td>...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>12.0</td>\n", | |
" <td>7.1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5038</th>\n", | |
" <td>Color</td>\n", | |
" <td>Scott Smith</td>\n", | |
" <td>1.0</td>\n", | |
" <td>87.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>318.0</td>\n", | |
" <td>Daphne Zuniga</td>\n", | |
" <td>637.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Comedy|Drama</td>\n", | |
" <td>...</td>\n", | |
" <td>6.0</td>\n", | |
" <td>English</td>\n", | |
" <td>Canada</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2013.0</td>\n", | |
" <td>470.0</td>\n", | |
" <td>7.7</td>\n", | |
" <td>NaN</td>\n", | |
" <td>84</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5039</th>\n", | |
" <td>Color</td>\n", | |
" <td>NaN</td>\n", | |
" <td>43.0</td>\n", | |
" <td>43.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>319.0</td>\n", | |
" <td>Valorie Curry</td>\n", | |
" <td>841.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Crime|Drama|Mystery|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>359.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>TV-14</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>593.0</td>\n", | |
" <td>7.5</td>\n", | |
" <td>16.00</td>\n", | |
" <td>32000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5040</th>\n", | |
" <td>Color</td>\n", | |
" <td>Benjamin Roberds</td>\n", | |
" <td>13.0</td>\n", | |
" <td>76.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Maxwell Moody</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Drama|Horror|Thriller</td>\n", | |
" <td>...</td>\n", | |
" <td>3.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1400.0</td>\n", | |
" <td>2013.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>6.3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5041</th>\n", | |
" <td>Color</td>\n", | |
" <td>Daniel Hsia</td>\n", | |
" <td>14.0</td>\n", | |
" <td>100.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>489.0</td>\n", | |
" <td>Daniel Henney</td>\n", | |
" <td>946.0</td>\n", | |
" <td>10443.0</td>\n", | |
" <td>Comedy|Drama|Romance</td>\n", | |
" <td>...</td>\n", | |
" <td>9.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG-13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2012.0</td>\n", | |
" <td>719.0</td>\n", | |
" <td>6.3</td>\n", | |
" <td>2.35</td>\n", | |
" <td>660</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5042</th>\n", | |
" <td>Color</td>\n", | |
" <td>Jon Gunn</td>\n", | |
" <td>43.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>Brian Herzlinger</td>\n", | |
" <td>86.0</td>\n", | |
" <td>85222.0</td>\n", | |
" <td>Documentary</td>\n", | |
" <td>...</td>\n", | |
" <td>84.0</td>\n", | |
" <td>English</td>\n", | |
" <td>USA</td>\n", | |
" <td>PG</td>\n", | |
" <td>1100.0</td>\n", | |
" <td>2004.0</td>\n", | |
" <td>23.0</td>\n", | |
" <td>6.6</td>\n", | |
" <td>1.85</td>\n", | |
" <td>456</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5043 rows × 28 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 228, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 228 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:16:16.174205Z", | |
"start_time": "2024-10-08T18:16:16.157905Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "director_best_scores", | |
"id": "8c29e9943824fee9", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" director_name imdb_score\n", | |
"0 A. Raven Cruz 1.9\n", | |
"1 Aaron Hann 6.0\n", | |
"2 Aaron Schneider 7.1\n", | |
"3 Aaron Seltzer 2.7\n", | |
"4 Abel Ferrara 6.6\n", | |
"... ... ...\n", | |
"2393 Zoran Lisinac 7.1\n", | |
"2394 Álex de la Iglesia 6.1\n", | |
"2395 Émile Gaudreault 6.7\n", | |
"2396 Éric Tessier 6.6\n", | |
"2397 Étienne Faure 4.3\n", | |
"\n", | |
"[2398 rows x 2 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>director_name</th>\n", | |
" <th>imdb_score</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>A. Raven Cruz</td>\n", | |
" <td>1.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Aaron Hann</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Aaron Schneider</td>\n", | |
" <td>7.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Aaron Seltzer</td>\n", | |
" <td>2.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Abel Ferrara</td>\n", | |
" <td>6.6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2393</th>\n", | |
" <td>Zoran Lisinac</td>\n", | |
" <td>7.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2394</th>\n", | |
" <td>Álex de la Iglesia</td>\n", | |
" <td>6.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2395</th>\n", | |
" <td>Émile Gaudreault</td>\n", | |
" <td>6.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2396</th>\n", | |
" <td>Éric Tessier</td>\n", | |
" <td>6.6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2397</th>\n", | |
" <td>Étienne Faure</td>\n", | |
" <td>4.3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>2398 rows × 2 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 227, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 227 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:18:58.437625Z", | |
"start_time": "2024-10-08T18:18:58.432489Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df.columns", | |
"id": "3563e71484d47b51", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',\n", | |
" 'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',\n", | |
" 'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',\n", | |
" 'movie_title', 'num_voted_users', 'cast_total_facebook_likes',\n", | |
" 'actor_3_name', 'facenumber_in_poster', 'plot_keywords',\n", | |
" 'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',\n", | |
" 'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',\n", | |
" 'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 231, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 231 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:24:02.304914Z", | |
"start_time": "2024-10-08T18:24:02.272953Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "df[['director_name', 'movie_title', 'title_year', 'imdb_score']].merge(director_best_scores.rename(columns={'imdb_score': 'best_imdb_score'}), on='director_name').query('imdb_score == best_imdb_score').sort_values('imdb_score', ascending=False)", | |
"id": "e6506afc9cec068", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" director_name movie_title title_year imdb_score \\\n", | |
"2712 John Blanchard Towering Inferno NaN 9.5 \n", | |
"1902 Frank Darabont The Shawshank Redemption 1994.0 9.3 \n", | |
"3401 Francis Ford Coppola The Godfather 1972.0 9.2 \n", | |
"4315 John Stockwell Kickboxer: Vengeance 2016.0 9.1 \n", | |
"66 Christopher Nolan The Dark Knight 2008.0 9.0 \n", | |
"... ... ... ... ... \n", | |
"4669 Georgia Hilton Subconscious 2015.0 2.2 \n", | |
"3278 Vondie Curtis-Hall Glitter 2001.0 2.1 \n", | |
"1698 Frédéric Auburtin United Passions 2014.0 2.0 \n", | |
"4507 A. Raven Cruz The Helix... Loaded 2005.0 1.9 \n", | |
"1115 Lawrence Kasanoff Foodfight! 2012.0 1.7 \n", | |
"\n", | |
" best_imdb_score \n", | |
"2712 9.5 \n", | |
"1902 9.3 \n", | |
"3401 9.2 \n", | |
"4315 9.1 \n", | |
"66 9.0 \n", | |
"... ... \n", | |
"4669 2.2 \n", | |
"3278 2.1 \n", | |
"1698 2.0 \n", | |
"4507 1.9 \n", | |
"1115 1.7 \n", | |
"\n", | |
"[2508 rows x 5 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>director_name</th>\n", | |
" <th>movie_title</th>\n", | |
" <th>title_year</th>\n", | |
" <th>imdb_score</th>\n", | |
" <th>best_imdb_score</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2712</th>\n", | |
" <td>John Blanchard</td>\n", | |
" <td>Towering Inferno</td>\n", | |
" <td>NaN</td>\n", | |
" <td>9.5</td>\n", | |
" <td>9.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1902</th>\n", | |
" <td>Frank Darabont</td>\n", | |
" <td>The Shawshank Redemption</td>\n", | |
" <td>1994.0</td>\n", | |
" <td>9.3</td>\n", | |
" <td>9.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3401</th>\n", | |
" <td>Francis Ford Coppola</td>\n", | |
" <td>The Godfather</td>\n", | |
" <td>1972.0</td>\n", | |
" <td>9.2</td>\n", | |
" <td>9.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4315</th>\n", | |
" <td>John Stockwell</td>\n", | |
" <td>Kickboxer: Vengeance</td>\n", | |
" <td>2016.0</td>\n", | |
" <td>9.1</td>\n", | |
" <td>9.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>66</th>\n", | |
" <td>Christopher Nolan</td>\n", | |
" <td>The Dark Knight</td>\n", | |
" <td>2008.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>9.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4669</th>\n", | |
" <td>Georgia Hilton</td>\n", | |
" <td>Subconscious</td>\n", | |
" <td>2015.0</td>\n", | |
" <td>2.2</td>\n", | |
" <td>2.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3278</th>\n", | |
" <td>Vondie Curtis-Hall</td>\n", | |
" <td>Glitter</td>\n", | |
" <td>2001.0</td>\n", | |
" <td>2.1</td>\n", | |
" <td>2.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1698</th>\n", | |
" <td>Frédéric Auburtin</td>\n", | |
" <td>United Passions</td>\n", | |
" <td>2014.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4507</th>\n", | |
" <td>A. Raven Cruz</td>\n", | |
" <td>The Helix... Loaded</td>\n", | |
" <td>2005.0</td>\n", | |
" <td>1.9</td>\n", | |
" <td>1.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1115</th>\n", | |
" <td>Lawrence Kasanoff</td>\n", | |
" <td>Foodfight!</td>\n", | |
" <td>2012.0</td>\n", | |
" <td>1.7</td>\n", | |
" <td>1.7</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>2508 rows × 5 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 240, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 240 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:28:36.703177Z", | |
"start_time": "2024-10-08T18:28:36.696752Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"left = pd.DataFrame({'x': ['a', 'b'], 'y': [1, 2]})\n", | |
"right = pd.DataFrame({'x': ['a', 'w', 'a'], 'u': [1, 2, 3]})\n" | |
], | |
"id": "1648d10f2911d705", | |
"outputs": [], | |
"execution_count": 244 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:29:48.439366Z", | |
"start_time": "2024-10-08T18:29:48.430714Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "left", | |
"id": "9c5b84bba3423050", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" x y\n", | |
"0 a 1\n", | |
"1 b 2" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>b</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 247, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 247 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:29:55.862351Z", | |
"start_time": "2024-10-08T18:29:55.853999Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "right", | |
"id": "29d6d8ee95ba5bda", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" x u\n", | |
"0 a 1\n", | |
"1 w 2\n", | |
"2 a 3" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>u</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>w</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>a</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 248, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 248 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:29:28.137673Z", | |
"start_time": "2024-10-08T18:29:28.126103Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "left.merge(right, on='x', how='left')", | |
"id": "efb81d66199375f7", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" x y u\n", | |
"0 a 1 1.0\n", | |
"1 a 1 3.0\n", | |
"2 b 2 NaN" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>u</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>b</td>\n", | |
" <td>2</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 246, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 246 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:30:20.356012Z", | |
"start_time": "2024-10-08T18:30:20.344889Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "left.merge(right, on='x', how='right')", | |
"id": "b5eebe4ccd140b17", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" x y u\n", | |
"0 a 1.0 1\n", | |
"1 w NaN 2\n", | |
"2 a 1.0 3" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>u</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>a</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>w</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>a</td>\n", | |
" <td>1.0</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 249, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 249 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:30:31.261375Z", | |
"start_time": "2024-10-08T18:30:31.240997Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "left.merge(right, on='x', how='outer')", | |
"id": "4139b8db4a1d6c93", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" x y u\n", | |
"0 a 1.0 1.0\n", | |
"1 a 1.0 3.0\n", | |
"2 b 2.0 NaN\n", | |
"3 w NaN 2.0" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>u</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>a</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>a</td>\n", | |
" <td>1.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>b</td>\n", | |
" <td>2.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>w</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 250, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 250 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-08T18:30:50.012700Z", | |
"start_time": "2024-10-08T18:30:49.990956Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": "left.merge(right, how='cross')", | |
"id": "2e2bf71f33c3ffb9", | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
" x_x y x_y u\n", | |
"0 a 1 a 1\n", | |
"1 a 1 w 2\n", | |
"2 a 1 a 3\n", | |
"3 b 2 a 1\n", | |
"4 b 2 w 2\n", | |
"5 b 2 a 3" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x_x</th>\n", | |
" <th>y</th>\n", | |
" <th>x_y</th>\n", | |
" <th>u</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" <td>w</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" <td>a</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>b</td>\n", | |
" <td>2</td>\n", | |
" <td>a</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>b</td>\n", | |
" <td>2</td>\n", | |
" <td>w</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>b</td>\n", | |
" <td>2</td>\n", | |
" <td>a</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"execution_count": 252, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 252 | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment