Skip to content

Instantly share code, notes, and snippets.

@datavudeja
Forked from animi4695/pandas.ipynb
Created August 15, 2025 10:50
Show Gist options
  • Save datavudeja/20de87878a156b6546f938784a22212b to your computer and use it in GitHub Desktop.
Save datavudeja/20de87878a156b6546f938784a22212b to your computer and use it in GitHub Desktop.
Pandas cheatsheet with examples
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"kernelspec": {
"name": "python",
"display_name": "Python (Pyodide)",
"language": "python"
},
"language_info": {
"codemirror_mode": {
"name": "python",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8"
}
},
"nbformat_minor": 4,
"nbformat": 4,
"cells": [
{
"cell_type": "code",
"source": "import pandas as pd\nimport numpy as np",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 1
},
{
"cell_type": "markdown",
"source": "### Series\n",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "markdown",
"source": "#### 2.1 Series Creation",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "x = pd.Series([5, 10, 15, 20, 25, 30, 35])",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 4
},
{
"cell_type": "code",
"source": "print(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 5\n1 10\n2 15\n3 20\n4 25\n5 30\n6 35\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 5
},
{
"cell_type": "code",
"source": "type(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 6,
"output_type": "execute_result",
"data": {
"text/plain": "pandas.core.series.Series"
},
"metadata": {}
}
],
"execution_count": 6
},
{
"cell_type": "code",
"source": "x.head()",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 7,
"output_type": "execute_result",
"data": {
"text/plain": "0 5\n1 10\n2 15\n3 20\n4 25\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 7
},
{
"cell_type": "code",
"source": "x.tail()",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 8,
"output_type": "execute_result",
"data": {
"text/plain": "2 15\n3 20\n4 25\n5 30\n6 35\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 8
},
{
"cell_type": "code",
"source": "data = {'a' : 0.,'b' : 1., 'c' : 2.,'d': 3,}\n\ny = pd.Series(data)\n\nprint(y)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "a 0.0\nb 1.0\nc 2.0\nd 3.0\ndtype: float64\n",
"output_type": "stream"
}
],
"execution_count": 11
},
{
"cell_type": "code",
"source": "z = np.array([10, 20, 30 , 40], dtype = 'int64')\n\npd.Series(z)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 16,
"output_type": "execute_result",
"data": {
"text/plain": "0 10\n1 20\n2 30\n3 40\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 16
},
{
"cell_type": "code",
"source": "pd.Series(np.linspace(start = 1, stop = 2, num = 10))",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 17,
"output_type": "execute_result",
"data": {
"text/plain": "0 1.000000\n1 1.111111\n2 1.222222\n3 1.333333\n4 1.444444\n5 1.555556\n6 1.666667\n7 1.777778\n8 1.888889\n9 2.000000\ndtype: float64"
},
"metadata": {}
}
],
"execution_count": 17
},
{
"cell_type": "markdown",
"source": "#### 2.2 Basic Indexing",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "x = pd.Series([5, 10, 15, 20, 25])\nx.index = [3, 1, 4, 0, 2]\n\nprint(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "3 5\n1 10\n4 15\n0 20\n2 25\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 21
},
{
"cell_type": "code",
"source": "x[0]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 22,
"output_type": "execute_result",
"data": {
"text/plain": "20"
},
"metadata": {}
}
],
"execution_count": 22
},
{
"cell_type": "code",
"source": "x.loc[0]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 25,
"output_type": "execute_result",
"data": {
"text/plain": "20"
},
"metadata": {}
}
],
"execution_count": 25
},
{
"cell_type": "code",
"source": "x.iloc[0]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 29,
"output_type": "execute_result",
"data": {
"text/plain": "5"
},
"metadata": {}
}
],
"execution_count": 29
},
{
"cell_type": "code",
"source": "x.iloc[1:4:2]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 30,
"output_type": "execute_result",
"data": {
"text/plain": "1 10\n0 20\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 30
},
{
"cell_type": "markdown",
"source": "###### Range index vs int64 Index",
"metadata": {}
},
{
"cell_type": "code",
"source": "x = pd.Series(np.random.normal(size = 10**7))\nprint(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 -0.095360\n1 -0.531923\n2 0.989790\n3 -0.515691\n4 0.229343\n ... \n9999995 -0.308055\n9999996 0.685634\n9999997 1.574605\n9999998 -0.551347\n9999999 -0.376001\nLength: 10000000, dtype: float64\n",
"output_type": "stream"
}
],
"execution_count": 33
},
{
"cell_type": "code",
"source": "x.index",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 39,
"output_type": "execute_result",
"data": {
"text/plain": "RangeIndex(start=0, stop=10000000, step=1)"
},
"metadata": {}
}
],
"execution_count": 39
},
{
"cell_type": "code",
"source": "y = pd.Series(np.random.normal(size = 10**7), index = np.arange(10**7))\nprint(y)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 -0.048644\n1 1.150449\n2 1.340439\n3 1.286724\n4 -1.398852\n ... \n9999995 -1.448750\n9999996 0.604911\n9999997 0.025833\n9999998 -0.064581\n9999999 -0.112200\nLength: 10000000, dtype: float64\n",
"output_type": "stream"
}
],
"execution_count": 36
},
{
"cell_type": "code",
"source": "y.index",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 38,
"output_type": "execute_result",
"data": {
"text/plain": "Int64Index([ 0, 1, 2, 3, 4, 5, 6,\n 7, 8, 9,\n ...\n 9999990, 9999991, 9999992, 9999993, 9999994, 9999995, 9999996,\n 9999997, 9999998, 9999999],\n dtype='int64', length=10000000)"
},
"metadata": {}
}
],
"execution_count": 38
},
{
"cell_type": "markdown",
"source": "int64 index consumes more memory than range index. it literally stores all the 10^7 indices",
"metadata": {}
},
{
"cell_type": "code",
"source": "import sys",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 40
},
{
"cell_type": "code",
"source": "sys.getsizeof(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 41,
"output_type": "execute_result",
"data": {
"text/plain": "80000088"
},
"metadata": {}
}
],
"execution_count": 41
},
{
"cell_type": "code",
"source": "sys.getsizeof(y)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 43,
"output_type": "execute_result",
"data": {
"text/plain": "160000016"
},
"metadata": {}
}
],
"execution_count": 43
},
{
"cell_type": "markdown",
"source": "###### Overwriting data in a series",
"metadata": {}
},
{
"cell_type": "code",
"source": "foo = pd.Series([10, 20, 30, 40, 50], index = ['a', 'b', 'c','d','e'])\nprint(foo)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "a 10\nb 20\nc 30\nd 40\ne 50\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 44
},
{
"cell_type": "code",
"source": "foo.iloc[1] = 200\nprint(foo)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "a 10\nb 200\nc 30\nd 40\ne 50\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 46
},
{
"cell_type": "code",
"source": "foo.iloc[[0,1,2]] = 99\nprint(foo)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "a 99\nb 99\nc 99\nd 40\ne 50\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 48
},
{
"cell_type": "code",
"source": "foo.iloc[:3] = 999\nprint(foo)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "a 999\nb 999\nc 999\nd 40\ne 50\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 50
},
{
"cell_type": "code",
"source": "x = pd.Series([10, 20, 30, 40])\ny = pd.Series([1, 11, 111, 1111], index = [7,3,2,0])\n\nprint(y)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "7 1\n3 11\n2 111\n0 1111\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 61
},
{
"cell_type": "code",
"source": "print(x)\n",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 10\n1 20\n2 30\n3 40\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 62
},
{
"cell_type": "code",
"source": "x.loc[[0,1]] = y",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 53
},
{
"cell_type": "code",
"source": "print(x) # looks for labels 0, 1 in x and try to replace the same with 0, 1 from y. as there is no 1 in y, it changes to NaN",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1111.0\n1 NaN\n2 30.0\n3 40.0\ndtype: float64\n",
"output_type": "stream"
}
],
"execution_count": 55
},
{
"cell_type": "code",
"source": "x.iloc[:2] = y.to_numpy()[:2]\nprint(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 11\n2 30\n3 40\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 63
},
{
"cell_type": "markdown",
"source": "#### 2.3 Series Basic Operations",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "x = pd.Series([1,2,3,4])\nprint(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 2\n2 3\n3 4\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 66
},
{
"cell_type": "code",
"source": "x + 1",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 67,
"output_type": "execute_result",
"data": {
"text/plain": "0 2\n1 3\n2 4\n3 5\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 67
},
{
"cell_type": "code",
"source": "x + pd.Series(1) # only adds at index 0",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 68,
"output_type": "execute_result",
"data": {
"text/plain": "0 2.0\n1 NaN\n2 NaN\n3 NaN\ndtype: float64"
},
"metadata": {}
}
],
"execution_count": 68
},
{
"cell_type": "markdown",
"source": "what is we don't want NaNs?",
"metadata": {}
},
{
"cell_type": "code",
"source": "x = pd.Series([1, 2,3,4])\ny = pd.Series([10, 20], index = [2,1])\n\nprint(x)\nprint(y)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 2\n2 3\n3 4\ndtype: int64\n2 10\n1 20\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 69
},
{
"cell_type": "code",
"source": "x.loc[y.index] += y\nprint(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 22\n2 13\n3 4\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 71
},
{
"cell_type": "markdown",
"source": "#### 2.4 Boolean Indexing",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "foo = pd.Series([20, 50, 11, 45, 17, 31])\nprint(foo)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 20\n1 50\n2 11\n3 45\n4 17\n5 31\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 73
},
{
"cell_type": "code",
"source": "foo < 20",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 74,
"output_type": "execute_result",
"data": {
"text/plain": "0 False\n1 False\n2 True\n3 False\n4 True\n5 False\ndtype: bool"
},
"metadata": {}
}
],
"execution_count": 74
},
{
"cell_type": "code",
"source": "mask = foo < 20\nfoo.loc[mask] #gets only values below 20. you can write foo.loc[foo < 20] too.",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 75,
"output_type": "execute_result",
"data": {
"text/plain": "2 11\n4 17\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 75
},
{
"cell_type": "code",
"source": "foo.index = [0,1,2,3,5,4] # swap last 2 indices\nprint(foo)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 20\n1 50\n2 11\n3 45\n5 17\n4 31\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 77
},
{
"cell_type": "code",
"source": "foo.loc[mask] # tries to match indicex of foo and mask",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 78,
"output_type": "execute_result",
"data": {
"text/plain": "2 11\n4 31\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 78
},
{
"cell_type": "code",
"source": "foo.loc[mask.to_numpy()]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 80,
"output_type": "execute_result",
"data": {
"text/plain": "2 11\n5 17\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 80
},
{
"cell_type": "markdown",
"source": "combining boolean series - element wise and element wise or negation",
"metadata": {}
},
{
"cell_type": "code",
"source": "ages = pd.Series(\n data = [42, 43, 14, 18, 1],\n index = ['peter', 'lois', 'chris', 'meg', 'stewie']\n)\nprint(ages)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "peter 42\nlois 43\nchris 14\nmeg 18\nstewie 1\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 81
},
{
"cell_type": "code",
"source": "genders = pd.Series(\n data = ['female', 'female', 'male', 'male', 'male'],\n index = ['lois', 'meg', 'chris', 'peter', 'stewie'],\n dtype = 'string'\n)\nprint(genders)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "lois female\nmeg female\nchris male\npeter male\nstewie male\ndtype: string\n",
"output_type": "stream"
}
],
"execution_count": 83
},
{
"cell_type": "code",
"source": "# Even though their indices are in diff order. we can still answer questions like\n# who's a male younger than 18?\n\nmask = (genders == 'male') & (ages < 18)\nmask.loc[mask]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 85,
"output_type": "execute_result",
"data": {
"text/plain": "chris True\nstewie True\ndtype: bool"
},
"metadata": {}
}
],
"execution_count": 85
},
{
"cell_type": "code",
"source": "mask",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 86,
"output_type": "execute_result",
"data": {
"text/plain": "chris True\nlois False\nmeg False\npeter False\nstewie True\ndtype: bool"
},
"metadata": {}
}
],
"execution_count": 86
},
{
"cell_type": "code",
"source": "~mask",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 87,
"output_type": "execute_result",
"data": {
"text/plain": "chris False\nlois True\nmeg True\npeter True\nstewie False\ndtype: bool"
},
"metadata": {}
}
],
"execution_count": 87
},
{
"cell_type": "markdown",
"source": "#### 2.5 Series Missing Values",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "# history of Nan\n\nroux = pd.Series([1,2,3])\nprint(roux)\nroux.iloc[1] = np.nan\nprint(roux) # converts the whole series to floating point number",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 2\n2 3\ndtype: int64\n0 1.0\n1 NaN\n2 3.0\ndtype: float64\n",
"output_type": "stream"
}
],
"execution_count": 92
},
{
"cell_type": "code",
"source": "roux = pd.Series([1,2,3], dtype = 'Int64')\nprint(roux)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 2\n2 3\ndtype: Int64\n",
"output_type": "stream"
}
],
"execution_count": 93
},
{
"cell_type": "code",
"source": "roux.iloc[1] = np.nan # or pd.Na or None\nprint(roux)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 <NA>\n2 3\ndtype: Int64\n",
"output_type": "stream"
}
],
"execution_count": 95
},
{
"cell_type": "code",
"source": "# fill na\nx = pd.Series([1, pd.NA, 3, pd.NA], dtype = 'Int64')\nprint(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 <NA>\n2 3\n3 <NA>\ndtype: Int64\n",
"output_type": "stream"
}
],
"execution_count": 96
},
{
"cell_type": "code",
"source": "x.fillna(-1)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 97,
"output_type": "execute_result",
"data": {
"text/plain": "0 1\n1 -1\n2 3\n3 -1\ndtype: Int64"
},
"metadata": {}
}
],
"execution_count": 97
},
{
"cell_type": "code",
"source": "x # notice the na values hasn't changed",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 98,
"output_type": "execute_result",
"data": {
"text/plain": "0 1\n1 <NA>\n2 3\n3 <NA>\ndtype: Int64"
},
"metadata": {}
}
],
"execution_count": 98
},
{
"cell_type": "code",
"source": "# to change it. use inplace=True\nx.fillna(-1, inplace=True)\nprint(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 -1\n2 3\n3 -1\ndtype: Int64\n",
"output_type": "stream"
}
],
"execution_count": 99
},
{
"cell_type": "markdown",
"source": "#### 2.6 Vectorization",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "x = pd.Series(np.random.uniform(low = 1, high = 2, size = 10**6))\nprint(x)\n",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 1.410044\n1 1.161561\n2 1.107593\n3 1.293172\n4 1.155268\n ... \n999995 1.682250\n999996 1.589057\n999997 1.310423\n999998 1.540775\n999999 1.796745\nLength: 1000000, dtype: float64\n",
"output_type": "stream"
}
],
"execution_count": 101
},
{
"cell_type": "code",
"source": "def average(x):\n avg = 0.0\n for i in range(x.size):\n avg += x.iloc[i]/x.size\n return avg",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 104
},
{
"cell_type": "code",
"source": "average(x) # very slow",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 105,
"output_type": "execute_result",
"data": {
"text/plain": "1.4999582043534072"
},
"metadata": {}
}
],
"execution_count": 105
},
{
"cell_type": "code",
"source": "np.mean(x) # very fast\n\n# avoid using loops.",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 106,
"output_type": "execute_result",
"data": {
"text/plain": "1.4999582043534536"
},
"metadata": {}
}
],
"execution_count": 106
},
{
"cell_type": "markdown",
"source": "#### 2.7 apply()",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "foo = pd.Series([3,9,2,2,8,7])\nprint(foo)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 3\n1 9\n2 2\n3 2\n4 8\n5 7\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 108
},
{
"cell_type": "code",
"source": "def my_func(x, a = 1, b = 3):\n return x - a if x % 2 == 0 else x + b",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 112
},
{
"cell_type": "code",
"source": "foo.apply(my_func, a = 2, b = 4)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 114,
"output_type": "execute_result",
"data": {
"text/plain": "0 7\n1 13\n2 0\n3 0\n4 6\n5 11\ndtype: int64"
},
"metadata": {}
}
],
"execution_count": 114
},
{
"cell_type": "code",
"source": "# apply() is not vectorized\n\nbigfoo = pd.Series(np.random.randint(low = 0, high = 9, size = 10**7))\nprint(bigfoo)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 0\n1 7\n2 2\n3 2\n4 8\n ..\n9999995 5\n9999996 1\n9999997 6\n9999998 4\n9999999 5\nLength: 10000000, dtype: int32\n",
"output_type": "stream"
}
],
"execution_count": 116
},
{
"cell_type": "code",
"source": "%%timeit\ny1 = bigfoo.apply(my_func)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "4.96 s ± 771 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
"output_type": "stream"
}
],
"execution_count": 117
},
{
"cell_type": "code",
"source": "%%timeit\na = bigfoo.to_numpy()\ny2 = pd.Series(np.where(a % 2 == 0, a - 1, a + 3))",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "90.2 ms ± 29.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"output_type": "stream"
}
],
"execution_count": 118
},
{
"cell_type": "code",
"source": "# apply method is for convienience and clarity. not speed",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 119
},
{
"cell_type": "markdown",
"source": "#### 2.8 View vs Copy",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "x = pd.Series(\n data = [2,3,5,7,11],\n index = [2,11,12, 30, 30]\n)\nprint(x)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "2 2\n11 3\n12 5\n30 7\n30 11\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 120
},
{
"cell_type": "code",
"source": "y = x\ny.iloc[0] = 99\nprint(y)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "2 99\n11 3\n12 5\n30 7\n30 11\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 121
},
{
"cell_type": "code",
"source": "print(x) # also modifies x",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "2 99\n11 3\n12 5\n30 7\n30 11\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 123
},
{
"cell_type": "code",
"source": "y = x.copy()\ny.iloc[0] = 999\nprint(y)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "2 999\n11 3\n12 5\n30 7\n30 11\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 124
},
{
"cell_type": "code",
"source": "print(x) # now x is not changed",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "2 99\n11 3\n12 5\n30 7\n30 11\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 125
},
{
"cell_type": "markdown",
"source": "#### 2.9 Challenge : Baby Names",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "# you and your spouse have decide to let the internet name your next child. You've asked the great people of the web to \n# submit their favorite names and you've compiled their submissions\n# into a series called babynames. Determine how many people have voted for the following names: 'Chad', 'Ruger', and 'Zeltron'",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 126
},
{
"cell_type": "code",
"source": "babynames = pd.Series(['Jathonathon', 'Zeltron', 'Ruger', 'Phreddy', 'Ruger', 'Chad', 'Chad' 'Ruger', 'Ryan', 'Ruger', 'Chad', 'Ryan', 'Phreddy', 'Phreddy', 'Phreddy', 'Mister', 'Zeltron'\n , 'Ryan', 'Ruger', 'Ruger', 'Jathonathon', 'Jathonathon', 'Ruger', 'Chad', 'Zeltron'], dtype = 'string')",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 127
},
{
"cell_type": "code",
"source": "# solution\n\nbabynames.value_counts().loc[['Chad', 'Ruger', 'Zeltron']] # values_counts - returns a series containing counts of unique values.",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 129,
"output_type": "execute_result",
"data": {
"text/plain": "Chad 3\nRuger 6\nZeltron 3\ndtype: Int64"
},
"metadata": {}
}
],
"execution_count": 129
},
{
"cell_type": "markdown",
"source": "#### 2.10 Challenge: Bess Knees",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "# Given, two series, bees and knees, if the ith value of bees is NaN, double the ith value inside knees.",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 130
},
{
"cell_type": "code",
"source": "bees = pd.Series([True, True, False, np.nan, True, False, True, np.nan])\nprint(bees)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "0 True\n1 True\n2 False\n3 NaN\n4 True\n5 False\n6 True\n7 NaN\ndtype: object\n",
"output_type": "stream"
}
],
"execution_count": 132
},
{
"cell_type": "code",
"source": "knees = pd.Series([5,2,9,1,3,10,5,2], index = [7,0,2,6,3,5,1,4])\nprint(knees)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "7 5\n0 2\n2 9\n6 1\n3 3\n5 10\n1 5\n4 2\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 137
},
{
"cell_type": "code",
"source": "# solution\n\n# to_numpy is important else the index wouldn't have matched.\n# knees.loc[pd.isna(bees)] will indices from bees\n\nknees.loc[pd.isna(bees).to_numpy()] *= 2\nprint(knees)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "7 5\n0 2\n2 9\n6 2\n3 3\n5 10\n1 5\n4 4\ndtype: int64\n",
"output_type": "stream"
}
],
"execution_count": 136
},
{
"cell_type": "markdown",
"source": "#### 2.11 Challenge: Car Shopping",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "# After accidentally leaving an ice chest of fish and shrimp in your car for a week while you were on vacation, you're now in the market for a new vehicle.\n# Your insurance didn't cover the loss, so you want to make sure you get a good deal on your new car. Given a series of car asking_prices and another series of car fair_prices,\n# determine which cars for sale are a good deal. In other words, identify cars whose asking price is less than their fair price.\n\n# The result should be a list of integer indices corresponding to the good deals in asking_prices.",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 141
},
{
"cell_type": "code",
"source": "asking_prices = pd.Series([5000, 7600, 9000, 8500, 7000], index =['civic', 'civic', 'camry', 'mustang', 'mustang'])\nfair_prices = pd.Series([5500, 7500, 7500], index = ['civic', 'mustang', 'camry'])",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 152
},
{
"cell_type": "code",
"source": "# solution\n\n# we can restructure this and solve it with Dataframes. we haven't learnt dataframes yet.\n\nall_fair_prices = fair_prices.loc[asking_prices.index]\noff_market_prices = asking_prices - all_fair_prices\nbelow_fair_prices = (off_market_prices < 0).reset_index(drop=True)\nbelow_fair_prices.loc[below_fair_prices].index.to_list()\n",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 156,
"output_type": "execute_result",
"data": {
"text/plain": "[0, 4]"
},
"metadata": {}
}
],
"execution_count": 156
},
{
"cell_type": "markdown",
"source": "#### 2.12 Challenge: Price Gouging",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "# Track the price of ground beef everyday for 10 days. You've compiled the data into a series called beef_prices, whose index represents the day of each recording. Determin which \n# day had the biggest price increase from the prior day.",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 157
},
{
"cell_type": "code",
"source": "generator = np.random.default_rng(123)\nbeef_prices = pd.Series(\n data = np.round(generator.uniform(low = 3, high = 5, size = 10), 2),\n index = generator.choice(10, size = 10, replace = False)\n)\nprint(beef_prices)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "4 4.36\n8 3.11\n2 3.44\n0 3.37\n6 3.35\n9 4.62\n3 4.85\n5 3.55\n1 4.64\n7 4.78\ndtype: float64\n",
"output_type": "stream"
}
],
"execution_count": 159
},
{
"cell_type": "code",
"source": "# solution\n\nbeef_prices.sort_index(inplace = True)\nbeef_prices_prev = beef_prices.shift(periods = 1)\ndaily_changes = beef_prices - beef_prices_prev\ndaily_changes.idxmax() # returns row label of the maximum value",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 164,
"output_type": "execute_result",
"data": {
"text/plain": "9"
},
"metadata": {}
}
],
"execution_count": 164
},
{
"cell_type": "markdown",
"source": "#### 2.13 Challenge: Fair Teams",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "# 6 coaches and 20 players have signed up. Your job is to randomly and fairly determine the teams, assigning players to coaches. Keep in mind that some teams \n# will have three players and some teams will have four players. given a Series of coaches and Series of players, create a Series of random coach-to-player mappings.\n\n# The resulting Series should have coach names in its index and corresponding player names in its values.",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 165
},
{
"cell_type": "code",
"source": "coaches = pd.Series(['Aaron', 'Donald', 'Joshua', 'Peter', 'Scott', 'Stephen'], dtype = 'string')\nplayers = pd.Series(['Asher', 'Connor', 'Elizabeth', 'Emily', 'Ethan', 'Hannah', 'Isabella', 'Isaiah', 'James', 'Joshua', 'Julian', 'Layla', 'Leo', \n 'Madison', 'Mia', 'Oliver', 'Ryan', 'Scarlat', 'William', 'Wyatt'], dtype = 'string')",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 166
},
{
"cell_type": "code",
"source": "coaches = coaches.sample(frac=1, random_state=2357) # returns a random sample of items from an axis of object\nplayers = players.sample(frac=1, random_state=7532)\n\nrepeats = np.ceil(len(players)/len(coaches)).astype('int64') # num of times coaches list is repeated beside player list. ceil(20/6) \ncoaches_repeated = pd.concat([coaches] * repeats).head(len(players)) # coaches list to match the length of players. head(players) is used to pick only 20 \n\nresult = players.copy()\nresult.index = pd.Index(coaches_repeated, name ='coach')\nprint(result)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "1 Donald\n3 Peter\n5 Stephen\n0 Aaron\n2 Joshua\n4 Scott\ndtype: string\n12 Leo\n13 Madison\n2 Elizabeth\n10 Julian\n8 James\n11 Layla\n6 Isabella\n7 Isaiah\n15 Oliver\n14 Mia\n17 Scarlat\n3 Emily\n5 Hannah\n1 Connor\n9 Joshua\n4 Ethan\n0 Asher\n18 William\n16 Ryan\n19 Wyatt\ndtype: string\ncoach\nDonald Leo\nPeter Madison\nStephen Elizabeth\nAaron Julian\nJoshua James\nScott Layla\nDonald Isabella\nPeter Isaiah\nStephen Oliver\nAaron Mia\nJoshua Scarlat\nScott Emily\nDonald Hannah\nPeter Connor\nStephen Joshua\nAaron Ethan\nJoshua Asher\nScott William\nDonald Ryan\nPeter Wyatt\ndtype: string\n",
"output_type": "stream"
}
],
"execution_count": 168
},
{
"cell_type": "markdown",
"source": "### DataFrame",
"metadata": {}
},
{
"cell_type": "markdown",
"source": "#### 3.1 DataFrame Creation",
"metadata": {}
},
{
"cell_type": "code",
"source": "# Dataframe is a table of data with a row index. row index is an unlabelled column on the left.\n\ndf = pd.DataFrame({'name' : ['Bob', 'Sue', 'Mary'], 'age' : [39, 57, 28]})\n\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " name age\n0 Bob 39\n1 Sue 57\n2 Mary 28\n",
"output_type": "stream"
}
],
"execution_count": 3
},
{
"cell_type": "code",
"source": "df = pd.DataFrame(\n [\n ['Bob', 39],\n ['Sue', 57],\n ['Mary', 28]\n ], columns =['name', 'age']\n)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " name age\n0 Bob 39\n1 Sue 57\n2 Mary 28\n",
"output_type": "stream"
}
],
"execution_count": 5
},
{
"cell_type": "code",
"source": "df.info()",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 3 entries, 0 to 2\nData columns (total 2 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 name 3 non-null object\n 1 age 3 non-null int64 \ndtypes: int64(1), object(1)\nmemory usage: 108.0+ bytes\n",
"output_type": "stream"
}
],
"execution_count": 6
},
{
"cell_type": "code",
"source": "df.shape",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 7,
"output_type": "execute_result",
"data": {
"text/plain": "(3, 2)"
},
"metadata": {}
}
],
"execution_count": 7
},
{
"cell_type": "code",
"source": "df.axes",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 8,
"output_type": "execute_result",
"data": {
"text/plain": "[RangeIndex(start=0, stop=3, step=1), Index(['name', 'age'], dtype='object')]"
},
"metadata": {}
}
],
"execution_count": 8
},
{
"cell_type": "code",
"source": "df.size",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 9,
"output_type": "execute_result",
"data": {
"text/plain": "6"
},
"metadata": {}
}
],
"execution_count": 9
},
{
"cell_type": "code",
"source": "df.rename(columns = {'age' : 'years'}, inplace = True)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " name years\n0 Bob 39\n1 Sue 57\n2 Mary 28\n",
"output_type": "stream"
}
],
"execution_count": 11
},
{
"cell_type": "markdown",
"source": "#### 3.2 To and From CSV",
"metadata": {}
},
{
"cell_type": "code",
"source": "# create data \n\ndf = pd.DataFrame(\n {\n 'id' : np.arange(1000),\n 'b' : np.random.normal(size = 1000),\n 'c': pd.Series(np.random.choice(['cat', 'dog', 'hippo'], size = 1000, replace = True))\n }\n)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " id b c\n0 0 1.703454 dog\n1 1 -0.291755 dog\n2 2 -0.255308 cat\n3 3 -0.628465 cat\n4 4 -0.108681 dog\n.. ... ... ...\n995 995 -0.127438 hippo\n996 996 -2.956545 hippo\n997 997 0.032741 cat\n998 998 -0.966289 hippo\n999 999 0.086645 cat\n\n[1000 rows x 3 columns]\n",
"output_type": "stream"
}
],
"execution_count": 2
},
{
"cell_type": "code",
"source": "df.to_csv('pets.csv', index = False)",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 3
},
{
"cell_type": "code",
"source": "pets = pd.read_csv('pets.csv')\nprint(pets)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " id b c\n0 0 1.703454 dog\n1 1 -0.291755 dog\n2 2 -0.255308 cat\n3 3 -0.628465 cat\n4 4 -0.108681 dog\n.. ... ... ...\n995 995 -0.127438 hippo\n996 996 -2.956545 hippo\n997 997 0.032741 cat\n998 998 -0.966289 hippo\n999 999 0.086645 cat\n\n[1000 rows x 3 columns]\n",
"output_type": "stream"
}
],
"execution_count": 4
},
{
"cell_type": "markdown",
"source": "#### 3.3 Basic Indexing",
"metadata": {}
},
{
"cell_type": "code",
"source": "df = pd.DataFrame(\n {\n 'shrimp' : [10, 20, 30 , 40, 50, 60],\n 'crab' : [5, 10, 15, 20, 25, 30],\n 'red fish' : [2,3,5,7,11, 13]\n }\n)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " shrimp crab red fish\n0 10 5 2\n1 20 10 3\n2 30 15 5\n3 40 20 7\n4 50 25 11\n5 60 30 13\n",
"output_type": "stream"
}
],
"execution_count": 5
},
{
"cell_type": "code",
"source": "df['shrimp']",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 6,
"output_type": "execute_result",
"data": {
"text/plain": "0 10\n1 20\n2 30\n3 40\n4 50\n5 60\nName: shrimp, dtype: int64"
},
"metadata": {}
}
],
"execution_count": 6
},
{
"cell_type": "code",
"source": "df.shrimp",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 7,
"output_type": "execute_result",
"data": {
"text/plain": "0 10\n1 20\n2 30\n3 40\n4 50\n5 60\nName: shrimp, dtype: int64"
},
"metadata": {}
}
],
"execution_count": 7
},
{
"cell_type": "code",
"source": "df[['shrimp', 'red fish']] # copy of original dataframe",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 9,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp red fish\n0 10 2\n1 20 3\n2 30 5\n3 40 7\n4 50 11\n5 60 13",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n <th>red fish</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10</td>\n <td>2</td>\n </tr>\n <tr>\n <th>1</th>\n <td>20</td>\n <td>3</td>\n </tr>\n <tr>\n <th>2</th>\n <td>30</td>\n <td>5</td>\n </tr>\n <tr>\n <th>3</th>\n <td>40</td>\n <td>7</td>\n </tr>\n <tr>\n <th>4</th>\n <td>50</td>\n <td>11</td>\n </tr>\n <tr>\n <th>5</th>\n <td>60</td>\n <td>13</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 9
},
{
"cell_type": "code",
"source": "df.iloc[[0,2,4]]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 10,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp crab red fish\n0 10 5 2\n2 30 15 5\n4 50 25 11",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n <th>crab</th>\n <th>red fish</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10</td>\n <td>5</td>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>30</td>\n <td>15</td>\n <td>5</td>\n </tr>\n <tr>\n <th>4</th>\n <td>50</td>\n <td>25</td>\n <td>11</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 10
},
{
"cell_type": "code",
"source": "df.iloc[0:5:2] #give me everything from 0 (inclusive) to 5 (exclusive), stepping by 2",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 14,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp crab red fish\n0 10 5 2\n2 30 15 5\n4 50 25 11",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n <th>crab</th>\n <th>red fish</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10</td>\n <td>5</td>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>30</td>\n <td>15</td>\n <td>5</td>\n </tr>\n <tr>\n <th>4</th>\n <td>50</td>\n <td>25</td>\n <td>11</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 14
},
{
"cell_type": "code",
"source": "df.iloc[1] # returns as Series",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 15,
"output_type": "execute_result",
"data": {
"text/plain": "shrimp 20\ncrab 10\nred fish 3\nName: 1, dtype: int64"
},
"metadata": {}
}
],
"execution_count": 15
},
{
"cell_type": "code",
"source": "df.iloc[[1]]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 16,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp crab red fish\n1 20 10 3",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n <th>crab</th>\n <th>red fish</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1</th>\n <td>20</td>\n <td>10</td>\n <td>3</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 16
},
{
"cell_type": "code",
"source": "df.iloc[:, [0,1]] # give me every row of the df but only columns 0, 1",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 20,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp crab\n0 10 5\n1 20 10\n2 30 15\n3 40 20\n4 50 25\n5 60 30",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n <th>crab</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10</td>\n <td>5</td>\n </tr>\n <tr>\n <th>1</th>\n <td>20</td>\n <td>10</td>\n </tr>\n <tr>\n <th>2</th>\n <td>30</td>\n <td>15</td>\n </tr>\n <tr>\n <th>3</th>\n <td>40</td>\n <td>20</td>\n </tr>\n <tr>\n <th>4</th>\n <td>50</td>\n <td>25</td>\n </tr>\n <tr>\n <th>5</th>\n <td>60</td>\n <td>30</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 20
},
{
"cell_type": "code",
"source": "df.iloc[[0,2], [1,2]] # give me rows 0, 2 with columns 1, 2",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 22,
"output_type": "execute_result",
"data": {
"text/plain": " crab red fish\n0 5 2\n2 15 5",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>crab</th>\n <th>red fish</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>5</td>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>15</td>\n <td>5</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 22
},
{
"cell_type": "code",
"source": "df.index = ['a', 'b', 'c', 'd', 'e', 'f']\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " shrimp crab red fish\na 10 5 2\nb 20 10 3\nc 30 15 5\nd 40 20 7\ne 50 25 11\nf 60 30 13\n",
"output_type": "stream"
}
],
"execution_count": 23
},
{
"cell_type": "code",
"source": "df.loc[['b', 'e']]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 24,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp crab red fish\nb 20 10 3\ne 50 25 11",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n <th>crab</th>\n <th>red fish</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>b</th>\n <td>20</td>\n <td>10</td>\n <td>3</td>\n </tr>\n <tr>\n <th>e</th>\n <td>50</td>\n <td>25</td>\n <td>11</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 24
},
{
"cell_type": "code",
"source": "df.loc[['a', 'c', 'f'], ['crab' , 'shrimp']]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 25,
"output_type": "execute_result",
"data": {
"text/plain": " crab shrimp\na 5 10\nc 15 30\nf 30 60",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>crab</th>\n <th>shrimp</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>a</th>\n <td>5</td>\n <td>10</td>\n </tr>\n <tr>\n <th>c</th>\n <td>15</td>\n <td>30</td>\n </tr>\n <tr>\n <th>f</th>\n <td>30</td>\n <td>60</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 25
},
{
"cell_type": "code",
"source": "df.loc['b':'e', ['crab', 'shrimp']]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 28,
"output_type": "execute_result",
"data": {
"text/plain": " crab shrimp\nb 10 20\nc 15 30\nd 20 40\ne 25 50",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>crab</th>\n <th>shrimp</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>b</th>\n <td>10</td>\n <td>20</td>\n </tr>\n <tr>\n <th>c</th>\n <td>15</td>\n <td>30</td>\n </tr>\n <tr>\n <th>d</th>\n <td>20</td>\n <td>40</td>\n </tr>\n <tr>\n <th>e</th>\n <td>25</td>\n <td>50</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 28
},
{
"cell_type": "code",
"source": "# df.iloc[:3, ['shrimp']] doesn't work in pandas. it expects numeric indexers\n\ndf.iloc[:3, [0, 2]]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 32,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp red fish\na 10 2\nb 20 3\nc 30 5",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n <th>red fish</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>a</th>\n <td>10</td>\n <td>2</td>\n </tr>\n <tr>\n <th>b</th>\n <td>20</td>\n <td>3</td>\n </tr>\n <tr>\n <th>c</th>\n <td>30</td>\n <td>5</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 32
},
{
"cell_type": "code",
"source": "# get_loc - get integer location for requested label\ndf.iloc[:3, [df.columns.get_loc(c) for c in ['shrimp', 'red fish']]]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 34,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp red fish\na 10 2\nb 20 3\nc 30 5",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n <th>red fish</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>a</th>\n <td>10</td>\n <td>2</td>\n </tr>\n <tr>\n <th>b</th>\n <td>20</td>\n <td>3</td>\n </tr>\n <tr>\n <th>c</th>\n <td>30</td>\n <td>5</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 34
},
{
"cell_type": "code",
"source": "df.loc[df.shrimp > 40, ['shrimp']]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 39,
"output_type": "execute_result",
"data": {
"text/plain": " shrimp\ne 50\nf 60",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>shrimp</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>e</th>\n <td>50</td>\n </tr>\n <tr>\n <th>f</th>\n <td>60</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 39
},
{
"cell_type": "markdown",
"source": "#### 3.4 Basic Operations",
"metadata": {}
},
{
"cell_type": "code",
"source": "df = pd.DataFrame(\n {\n 'a' : [2,3,11, 13],\n 'b' : ['fox', 'rabbit', 'hound', 'rabbit']\n }\n)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " a b\n0 2 fox\n1 3 rabbit\n2 11 hound\n3 13 rabbit\n",
"output_type": "stream"
}
],
"execution_count": 41
},
{
"cell_type": "code",
"source": "# inserting a new column into df\n\ndf['c'] = [1, 0 , 1, 2]\n\n# we cannot use df.c = 1\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " a b c\n0 2 fox 1\n1 3 rabbit 0\n2 11 hound 1\n3 13 rabbit 2\n",
"output_type": "stream"
}
],
"execution_count": 44
},
{
"cell_type": "code",
"source": "df['e'] = df.a + df.c\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " a b c e\n0 2 fox 1 3\n1 3 rabbit 0 3\n2 11 hound 1 12\n3 13 rabbit 2 15\n",
"output_type": "stream"
}
],
"execution_count": 45
},
{
"cell_type": "code",
"source": "df.loc[df.b == 'rabbit', 'f'] = 0\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " a b c e f\n0 2 fox 1 3 NaN\n1 3 rabbit 0 3 0.0\n2 11 hound 1 12 NaN\n3 13 rabbit 2 15 0.0\n",
"output_type": "stream"
}
],
"execution_count": 47
},
{
"cell_type": "code",
"source": "df.drop(columns = ['a', 'c'], inplace = True)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " b e f\n0 fox 3 NaN\n1 rabbit 3 0.0\n2 hound 12 NaN\n3 rabbit 15 0.0\n",
"output_type": "stream"
}
],
"execution_count": 48
},
{
"cell_type": "markdown",
"source": "#### 3.5 apply()",
"metadata": {}
},
{
"cell_type": "code",
"source": "df = pd.DataFrame(\n {\n 'A' : [5.2, 1.7, 9.4],\n 'B' : [3.9, 4.0, 7.8]\n }\n)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " A B\n0 5.2 3.9\n1 1.7 4.0\n2 9.4 7.8\n",
"output_type": "stream"
}
],
"execution_count": 49
},
{
"cell_type": "code",
"source": "# apply a function along an axis of the DataFrame\n\ndf.apply(func = np.sum, axis = 1)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 51,
"output_type": "execute_result",
"data": {
"text/plain": "0 9.1\n1 5.7\n2 17.2\ndtype: float64"
},
"metadata": {}
}
],
"execution_count": 51
},
{
"cell_type": "code",
"source": "kids = pd.DataFrame(\n {\n 'name' : pd.Series(['alice', 'mary', 'jimmy', 'johnny', 'susan'], dtype = 'string'),\n 'age' : [9,13,11,15, 8],\n 'with_adult' : [True, False, False, True, True]\n }\n)\nprint(kids)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " name age with_adult\n0 alice 9 True\n1 mary 13 False\n2 jimmy 11 False\n3 johnny 15 True\n4 susan 8 True\n",
"output_type": "stream"
}
],
"execution_count": 52
},
{
"cell_type": "code",
"source": "def is_allowed(age, with_adult):\n return age >= 12 or with_adult",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 57
},
{
"cell_type": "code",
"source": "# kids.apply(is_allowed, axis = 1) # doesn't work\n\nkids.apply(lambda row : is_allowed(row.loc['age'], row.loc['with_adult']), axis = 1)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 59,
"output_type": "execute_result",
"data": {
"text/plain": "0 True\n1 True\n2 False\n3 True\n4 True\ndtype: bool"
},
"metadata": {}
}
],
"execution_count": 59
},
{
"cell_type": "code",
"source": "kids['is_allowed'] = kids.apply(lambda row: is_allowed(row.loc['age'], row.loc['with_adult']), axis = 1)\nprint(kids)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " name age with_adult is_allowed\n0 alice 9 True True\n1 mary 13 False True\n2 jimmy 11 False False\n3 johnny 15 True True\n4 susan 8 True True\n",
"output_type": "stream"
}
],
"execution_count": 61
},
{
"cell_type": "markdown",
"source": "#### 3.6 view vs copy",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "df = pd.DataFrame(\n {\n 'x' : [1,2,3],\n 'y' : [10, 20, 30]\n }\n)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " x y\n0 1 10\n1 2 20\n2 3 30\n",
"output_type": "stream"
}
],
"execution_count": 77
},
{
"cell_type": "code",
"source": "v1 = df['x']",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 78
},
{
"cell_type": "code",
"source": "v1.iloc[0] = 999\nprint(df) # df changed",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " x y\n0 999 10\n1 2 20\n2 3 30\n",
"output_type": "stream"
}
],
"execution_count": 79
},
{
"cell_type": "code",
"source": "v2 = df['y'] + 1",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 80
},
{
"cell_type": "code",
"source": "v2.iloc[0] = 0\nprint(df) # df doesn't change",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " x y\n0 999 10\n1 2 20\n2 3 30\n",
"output_type": "stream"
}
],
"execution_count": 81
},
{
"cell_type": "code",
"source": "v1 = df.iloc[[0,1]]\nv2 = df.iloc[:2]",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": 82
},
{
"cell_type": "code",
"source": "v1.iloc[1] = 111\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stderr",
"text": "<ipython-input-84-4c2417139498>:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n v1.iloc[1] = 111\n",
"output_type": "stream"
},
{
"name": "stdout",
"text": " x y\n0 999 10\n1 2 20\n2 3 30\n",
"output_type": "stream"
}
],
"execution_count": 84
},
{
"cell_type": "code",
"source": "v2.iloc[1] = 222\nprint(df) #changed because v2 is just a reference to the memory. v2 is a view not a copy.",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stderr",
"text": "<ipython-input-85-6c752e41547c>:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n v2.iloc[1] = 222\n",
"output_type": "stream"
},
{
"name": "stdout",
"text": " x y\n0 999 10\n1 222 222\n2 3 30\n",
"output_type": "stream"
}
],
"execution_count": 85
},
{
"cell_type": "markdown",
"source": "#### 3.7 merge()",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
}
},
{
"cell_type": "code",
"source": "pets = pd.DataFrame(\n data = {\n 'name' : ['Mr. Snuggles', 'Honey Chew Chew', 'Professor', 'Chairman Meow', 'hello'],\n 'type' : ['cat', 'dog', 'dog', 'cat', 'horse']\n },\n index = [71, 42, 11, 98, 42]\n)\n\nprint(pets)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " name type\n71 Mr. Snuggles cat\n42 Honey Chew Chew dog\n11 Professor dog\n98 Chairman cat\n42 hello horse\n",
"output_type": "stream"
}
],
"execution_count": 87
},
{
"cell_type": "code",
"source": "visits = pd.DataFrame(\n data = {\n 'pet_id' : [42, 31, 71, 42, 98, 42],\n 'date' : ['2019-03-15', '2019-03-15', '2019-04-05', '2019-04-06', '2019-04-07', '2019-04-08']\n }\n)\nprint(visits)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " pet_id date\n0 42 2019-03-15\n1 31 2019-03-15\n2 71 2019-04-05\n3 42 2019-04-06\n4 98 2019-04-07\n5 42 2019-04-08\n",
"output_type": "stream"
}
],
"execution_count": 90
},
{
"cell_type": "code",
"source": "# merge - merges data frames or named series objects with a databse-style join\n\n# params - left, right, how (left, right, outer, inner(default))\n\npd.merge(left = pets, right = visits, how = 'inner', left_index =True, right_on = 'pet_id')\n",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 92,
"output_type": "execute_result",
"data": {
"text/plain": " name type pet_id date\n2 Mr. Snuggles cat 71 2019-04-05\n0 Honey Chew Chew dog 42 2019-03-15\n3 Honey Chew Chew dog 42 2019-04-06\n5 Honey Chew Chew dog 42 2019-04-08\n0 hello horse 42 2019-03-15\n3 hello horse 42 2019-04-06\n5 hello horse 42 2019-04-08\n4 Chairman cat 98 2019-04-07",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>type</th>\n <th>pet_id</th>\n <th>date</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2</th>\n <td>Mr. Snuggles</td>\n <td>cat</td>\n <td>71</td>\n <td>2019-04-05</td>\n </tr>\n <tr>\n <th>0</th>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>42</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>42</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>42</td>\n <td>2019-04-08</td>\n </tr>\n <tr>\n <th>0</th>\n <td>hello</td>\n <td>horse</td>\n <td>42</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>3</th>\n <td>hello</td>\n <td>horse</td>\n <td>42</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>5</th>\n <td>hello</td>\n <td>horse</td>\n <td>42</td>\n <td>2019-04-08</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Chairman</td>\n <td>cat</td>\n <td>98</td>\n <td>2019-04-07</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 92
},
{
"cell_type": "code",
"source": "pets.index.rename('pet_id', inplace = True)\nprint(pets)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " name type\npet_id \n71 Mr. Snuggles cat\n42 Honey Chew Chew dog\n11 Professor dog\n98 Chairman cat\n42 hello horse\n",
"output_type": "stream"
}
],
"execution_count": 93
},
{
"cell_type": "code",
"source": "visits.index.rename('visit_id', inplace = True)\nprint(visits)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " pet_id date\nvisit_id \n0 42 2019-03-15\n1 31 2019-03-15\n2 71 2019-04-05\n3 42 2019-04-06\n4 98 2019-04-07\n5 42 2019-04-08\n",
"output_type": "stream"
}
],
"execution_count": 94
},
{
"cell_type": "code",
"source": "# advantage of setting petid, visit_id is we can us 'on' while merging\n\npd.merge(left = pets, right = visits, how = 'inner', on = 'pet_id')",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 95,
"output_type": "execute_result",
"data": {
"text/plain": " pet_id name type date\n0 71 Mr. Snuggles cat 2019-04-05\n1 42 Honey Chew Chew dog 2019-03-15\n2 42 Honey Chew Chew dog 2019-04-06\n3 42 Honey Chew Chew dog 2019-04-08\n4 42 hello horse 2019-03-15\n5 42 hello horse 2019-04-06\n6 42 hello horse 2019-04-08\n7 98 Chairman cat 2019-04-07",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>pet_id</th>\n <th>name</th>\n <th>type</th>\n <th>date</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>71</td>\n <td>Mr. Snuggles</td>\n <td>cat</td>\n <td>2019-04-05</td>\n </tr>\n <tr>\n <th>1</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>2</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>3</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-04-08</td>\n </tr>\n <tr>\n <th>4</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>5</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>6</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-04-08</td>\n </tr>\n <tr>\n <th>7</th>\n <td>98</td>\n <td>Chairman</td>\n <td>cat</td>\n <td>2019-04-07</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 95
},
{
"cell_type": "code",
"source": "pd.merge(left = pets, right = visits, how = 'left', on = 'pet_id')",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 96,
"output_type": "execute_result",
"data": {
"text/plain": " pet_id name type date\n0 71 Mr. Snuggles cat 2019-04-05\n1 42 Honey Chew Chew dog 2019-03-15\n2 42 Honey Chew Chew dog 2019-04-06\n3 42 Honey Chew Chew dog 2019-04-08\n4 11 Professor dog NaN\n5 98 Chairman cat 2019-04-07\n6 42 hello horse 2019-03-15\n7 42 hello horse 2019-04-06\n8 42 hello horse 2019-04-08",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>pet_id</th>\n <th>name</th>\n <th>type</th>\n <th>date</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>71</td>\n <td>Mr. Snuggles</td>\n <td>cat</td>\n <td>2019-04-05</td>\n </tr>\n <tr>\n <th>1</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>2</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>3</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-04-08</td>\n </tr>\n <tr>\n <th>4</th>\n <td>11</td>\n <td>Professor</td>\n <td>dog</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>5</th>\n <td>98</td>\n <td>Chairman</td>\n <td>cat</td>\n <td>2019-04-07</td>\n </tr>\n <tr>\n <th>6</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>7</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>8</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-04-08</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 96
},
{
"cell_type": "code",
"source": "pd.merge(left = pets, right = visits, how = 'right', on = 'pet_id')",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 98,
"output_type": "execute_result",
"data": {
"text/plain": " pet_id name type date\n0 42 Honey Chew Chew dog 2019-03-15\n1 42 hello horse 2019-03-15\n2 31 NaN NaN 2019-03-15\n3 71 Mr. Snuggles cat 2019-04-05\n4 42 Honey Chew Chew dog 2019-04-06\n5 42 hello horse 2019-04-06\n6 98 Chairman cat 2019-04-07\n7 42 Honey Chew Chew dog 2019-04-08\n8 42 hello horse 2019-04-08",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>pet_id</th>\n <th>name</th>\n <th>type</th>\n <th>date</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>1</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>2</th>\n <td>31</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>3</th>\n <td>71</td>\n <td>Mr. Snuggles</td>\n <td>cat</td>\n <td>2019-04-05</td>\n </tr>\n <tr>\n <th>4</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>5</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>6</th>\n <td>98</td>\n <td>Chairman</td>\n <td>cat</td>\n <td>2019-04-07</td>\n </tr>\n <tr>\n <th>7</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-04-08</td>\n </tr>\n <tr>\n <th>8</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-04-08</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 98
},
{
"cell_type": "code",
"source": "pd.merge(left = pets, right = visits, how = 'outer', on = 'pet_id')",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 99,
"output_type": "execute_result",
"data": {
"text/plain": " pet_id name type date\n0 71 Mr. Snuggles cat 2019-04-05\n1 42 Honey Chew Chew dog 2019-03-15\n2 42 Honey Chew Chew dog 2019-04-06\n3 42 Honey Chew Chew dog 2019-04-08\n4 42 hello horse 2019-03-15\n5 42 hello horse 2019-04-06\n6 42 hello horse 2019-04-08\n7 11 Professor dog NaN\n8 98 Chairman cat 2019-04-07\n9 31 NaN NaN 2019-03-15",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>pet_id</th>\n <th>name</th>\n <th>type</th>\n <th>date</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>71</td>\n <td>Mr. Snuggles</td>\n <td>cat</td>\n <td>2019-04-05</td>\n </tr>\n <tr>\n <th>1</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>2</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>3</th>\n <td>42</td>\n <td>Honey Chew Chew</td>\n <td>dog</td>\n <td>2019-04-08</td>\n </tr>\n <tr>\n <th>4</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-03-15</td>\n </tr>\n <tr>\n <th>5</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-04-06</td>\n </tr>\n <tr>\n <th>6</th>\n <td>42</td>\n <td>hello</td>\n <td>horse</td>\n <td>2019-04-08</td>\n </tr>\n <tr>\n <th>7</th>\n <td>11</td>\n <td>Professor</td>\n <td>dog</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>8</th>\n <td>98</td>\n <td>Chairman</td>\n <td>cat</td>\n <td>2019-04-07</td>\n </tr>\n <tr>\n <th>9</th>\n <td>31</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2019-03-15</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 99
},
{
"cell_type": "code",
"source": "# anti join i.e records that are in pets but not in visits\n# easy but less efficient and uses lot of memory\nouter = pd.merge(left = pets, right = visits, how = 'outer', on = 'pet_id', indicator = True)\nprint(outer)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " pet_id name type date _merge\n0 71 Mr. Snuggles cat 2019-04-05 both\n1 42 Honey Chew Chew dog 2019-03-15 both\n2 42 Honey Chew Chew dog 2019-04-06 both\n3 42 Honey Chew Chew dog 2019-04-08 both\n4 42 hello horse 2019-03-15 both\n5 42 hello horse 2019-04-06 both\n6 42 hello horse 2019-04-08 both\n7 11 Professor dog NaN left_only\n8 98 Chairman cat 2019-04-07 both\n9 31 NaN NaN 2019-03-15 right_only\n",
"output_type": "stream"
}
],
"execution_count": 100
},
{
"cell_type": "code",
"source": "a_not_in_b = outer.loc[outer._merge == 'left_only']\nprint(a_not_in_b)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " pet_id name type date _merge\n7 11 Professor dog NaN left_only\n",
"output_type": "stream"
}
],
"execution_count": 102
},
{
"cell_type": "code",
"source": "# memory efficient way\npets.index.isin(visits.pet_id)\n",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 103,
"output_type": "execute_result",
"data": {
"text/plain": "array([ True, True, False, True, True])"
},
"metadata": {}
}
],
"execution_count": 103
},
{
"cell_type": "code",
"source": "pets.loc[~pets.index.isin(visits.pet_id)]",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 104,
"output_type": "execute_result",
"data": {
"text/plain": " name type\npet_id \n11 Professor dog",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>type</th>\n </tr>\n <tr>\n <th>pet_id</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>11</th>\n <td>Professor</td>\n <td>dog</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 104
},
{
"cell_type": "markdown",
"source": "#### 3.8 Aggregation",
"metadata": {}
},
{
"cell_type": "code",
"source": "df = pd.DataFrame({\n 'x' : [3.1, 5.5, 9.2, 1.7, 1.2, 8.3, 2.6],\n 'y' : [1.4, np.nan, 5.0, 5.8, 9.0, np.nan, 9.2]\n})\n\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " x y\n0 3.1 1.4\n1 5.5 NaN\n2 9.2 5.0\n3 1.7 5.8\n4 1.2 9.0\n5 8.3 NaN\n6 2.6 9.2\n",
"output_type": "stream"
}
],
"execution_count": 105
},
{
"cell_type": "code",
"source": "# agg - aggregate using one or more operations over the specified axis.\n\ndf.agg('sum')",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 106,
"output_type": "execute_result",
"data": {
"text/plain": "x 31.6\ny 30.4\ndtype: float64"
},
"metadata": {}
}
],
"execution_count": 106
},
{
"cell_type": "code",
"source": "# as a df\ndf.agg(['sum']) ",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 107,
"output_type": "execute_result",
"data": {
"text/plain": " x y\nsum 31.6 30.4",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>x</th>\n <th>y</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>sum</th>\n <td>31.6</td>\n <td>30.4</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 107
},
{
"cell_type": "code",
"source": "df.agg(['sum', 'mean']) ",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 108,
"output_type": "execute_result",
"data": {
"text/plain": " x y\nsum 31.600000 30.40\nmean 4.514286 6.08",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>x</th>\n <th>y</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>sum</th>\n <td>31.600000</td>\n <td>30.40</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>4.514286</td>\n <td>6.08</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 108
},
{
"cell_type": "code",
"source": "# you can even pass custom functions\ndef foofun(x):\n return x.iloc[1]\n\ndf.agg(foofun)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 110,
"output_type": "execute_result",
"data": {
"text/plain": "x 5.5\ny NaN\ndtype: float64"
},
"metadata": {}
}
],
"execution_count": 110
},
{
"cell_type": "code",
"source": "df.agg({'x' : ['sum', 'mean'], 'y' : ['min', 'max']})",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 111,
"output_type": "execute_result",
"data": {
"text/plain": " x y\nsum 31.600000 NaN\nmean 4.514286 NaN\nmin NaN 1.4\nmax NaN 9.2",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>x</th>\n <th>y</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>sum</th>\n <td>31.600000</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>4.514286</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>min</th>\n <td>NaN</td>\n <td>1.4</td>\n </tr>\n <tr>\n <th>max</th>\n <td>NaN</td>\n <td>9.2</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 111
},
{
"cell_type": "markdown",
"source": "#### 3.9 groupby()",
"metadata": {}
},
{
"cell_type": "code",
"source": "df = pd.DataFrame({\n 'A' : ['foo', 'bar', 'foo', 'bar', 'bar', 'foo', 'foo'],\n 'B' : [False, True, False, True, True, True, True],\n 'C' : [2.1, 1.9, 3.6, 4.0, 1.9, 7.8, 2.8],\n 'D' : [50, np.nan, 30, 90, 10, np.nan, 10]\n}).convert_dtypes() # convert_dtypes - convert columns to best possible dtypes using dtypes supporting pd.NA\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " A B C D\n0 foo False 2.1 50\n1 bar True 1.9 <NA>\n2 foo False 3.6 30\n3 bar True 4.0 90\n4 bar True 1.9 10\n5 foo True 7.8 <NA>\n6 foo True 2.8 10\n",
"output_type": "stream"
}
],
"execution_count": 135
},
{
"cell_type": "code",
"source": "groups = df.groupby(by = 'A')\nprint(groups.groups)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": "{'bar': [1, 3, 4], 'foo': [0, 2, 5, 6]}\n",
"output_type": "stream"
}
],
"execution_count": 136
},
{
"cell_type": "code",
"source": "print(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " A B C D\n0 foo False 2.1 50\n1 bar True 1.9 <NA>\n2 foo False 3.6 30\n3 bar True 4.0 90\n4 bar True 1.9 10\n5 foo True 7.8 <NA>\n6 foo True 2.8 10\n",
"output_type": "stream"
}
],
"execution_count": 137
},
{
"cell_type": "code",
"source": "df.groupby('A')['C'].sum()",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 117,
"output_type": "execute_result",
"data": {
"text/plain": "A\nbar 7.8\nfoo 16.3\nName: C, dtype: Float64"
},
"metadata": {}
}
],
"execution_count": 117
},
{
"cell_type": "code",
"source": "df.groupby('A')[['C']].sum()",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 118,
"output_type": "execute_result",
"data": {
"text/plain": " C\nA \nbar 7.8\nfoo 16.3",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>C</th>\n </tr>\n <tr>\n <th>A</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>bar</th>\n <td>7.8</td>\n </tr>\n <tr>\n <th>foo</th>\n <td>16.3</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 118
},
{
"cell_type": "code",
"source": "df.groupby('A')[['C', 'D']].agg('sum')",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 119,
"output_type": "execute_result",
"data": {
"text/plain": " C D\nA \nbar 7.8 100\nfoo 16.3 90",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>C</th>\n <th>D</th>\n </tr>\n <tr>\n <th>A</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>bar</th>\n <td>7.8</td>\n <td>100</td>\n </tr>\n <tr>\n <th>foo</th>\n <td>16.3</td>\n <td>90</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 119
},
{
"cell_type": "code",
"source": "df.groupby(by = ['B','D'], dropna = False).agg(\n C_min = ('C', 'min'),\n C_max = ('C', np.max)\n)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 134,
"output_type": "execute_result",
"data": {
"text/plain": " C_min C_max\nB D \nFalse 30 3.6 3.6\n 50 2.1 2.1\nTrue 10 1.9 2.8\n 90 4.0 4.0\n <NA> 1.9 7.8",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th>C_min</th>\n <th>C_max</th>\n </tr>\n <tr>\n <th>B</th>\n <th>D</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th rowspan=\"2\" valign=\"top\">False</th>\n <th>30</th>\n <td>3.6</td>\n <td>3.6</td>\n </tr>\n <tr>\n <th>50</th>\n <td>2.1</td>\n <td>2.1</td>\n </tr>\n <tr>\n <th rowspan=\"3\" valign=\"top\">True</th>\n <th>10</th>\n <td>1.9</td>\n <td>2.8</td>\n </tr>\n <tr>\n <th>90</th>\n <td>4.0</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>&lt;NA&gt;</th>\n <td>1.9</td>\n <td>7.8</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
],
"execution_count": 134
},
{
"cell_type": "code",
"source": "df.groupby('A')['C'].transform(lambda x : x.sort_values().values)",
"metadata": {
"trusted": true
},
"outputs": [
{
"execution_count": 141,
"output_type": "execute_result",
"data": {
"text/plain": "0 2.1\n1 1.9\n2 2.8\n3 1.9\n4 4.0\n5 3.6\n6 7.8\nName: C, dtype: Float64"
},
"metadata": {}
}
],
"execution_count": 141
},
{
"cell_type": "code",
"source": "df['C_sorted_within_A'] = df.groupby('A')['C'].transform(lambda x : x.sort_values().values)\nprint(df)",
"metadata": {
"trusted": true
},
"outputs": [
{
"name": "stdout",
"text": " A B C D C_sorted_within_A\n0 foo False 2.1 50 2.1\n1 bar True 1.9 <NA> 1.9\n2 foo False 3.6 30 2.8\n3 bar True 4.0 90 1.9\n4 bar True 1.9 10 4.0\n5 foo True 7.8 <NA> 3.6\n6 foo True 2.8 10 7.8\n",
"output_type": "stream"
}
],
"execution_count": 144
},
{
"cell_type": "code",
"source": "",
"metadata": {
"trusted": true
},
"outputs": [],
"execution_count": null
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment