Last active
October 4, 2021 22:23
-
-
Save hsm207/03e74d95fb805ff1bafd1d79396c694e to your computer and use it in GitHub Desktop.
Explain how pandas.core.groupby.GroupBy.apply decides the return type
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Import pandas:" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"source": [ | |
"import pandas as pd" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Define some dummy functions to apply to each group: " | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"source": [ | |
"def f1(df):\n", | |
" if df[\"ticker\"].iloc[0] == \"A\":\n", | |
" return df[\"price\"] / 2\n", | |
" else:\n", | |
" return df[\"price\"] * 2\n", | |
"\n", | |
"def f2(df):\n", | |
" if df[\"ticker\"].iloc[0] == \"A\":\n", | |
" return None\n", | |
" else:\n", | |
" return df[\"price\"] * 2" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Define a data frame with equal number of groups (ticker column):" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"source": [ | |
"df1 = pd.DataFrame({\n", | |
" \"ticker\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n", | |
" \"price\": [1, 2, 3, 4, 5, 6], \n", | |
"})\n", | |
"\n", | |
"df1\n" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ticker</th>\n", | |
" <th>price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>A</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>A</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>B</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>B</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>C</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>C</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" ticker price\n", | |
"0 A 1\n", | |
"1 A 2\n", | |
"2 B 3\n", | |
"3 B 4\n", | |
"4 C 5\n", | |
"5 C 6" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 3 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Define a dataframe with unequal number of groups:" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"source": [ | |
"df2 = pd.DataFrame({\n", | |
" \"ticker\": [\"A\", \"A\", \"B\"],\n", | |
" \"price\": [1, 2, 3],\n", | |
"})\n", | |
"\n", | |
"df2\n" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ticker</th>\n", | |
" <th>price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>A</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>A</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>B</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" ticker price\n", | |
"0 A 1\n", | |
"1 A 2\n", | |
"2 B 3" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Apply `f1` to each data frame:" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"source": [ | |
"df1.groupby(\"ticker\").apply(f1)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"ticker \n", | |
"A 0 0.5\n", | |
" 1 1.0\n", | |
"B 2 6.0\n", | |
" 3 8.0\n", | |
"C 4 10.0\n", | |
" 5 12.0\n", | |
"Name: price, dtype: float64" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 5 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"source": [ | |
"df2.groupby(\"ticker\").apply(f1)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"ticker \n", | |
"A 0 0.5\n", | |
" 1 1.0\n", | |
"B 2 6.0\n", | |
"Name: price, dtype: float64" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 6 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Apply `f2` to each data frame:" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"source": [ | |
"df1.groupby(\"ticker\").apply(f2)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"ticker \n", | |
"A 2 NaN\n", | |
" 3 NaN\n", | |
"B 2 6\n", | |
" 3 8\n", | |
"C 4 10\n", | |
" 5 12\n", | |
"dtype: object" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 7 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"source": [ | |
"df2.groupby(\"ticker\").apply(f2)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ticker</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>A</th>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B</th>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 2\n", | |
"ticker \n", | |
"A NaN\n", | |
"B 6.0" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 8 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Applying `f2` on `df1` results in a Series.\n\n", | |
"Applying `f2` on `df1` results in a Dataframe.\n\n", | |
"Applying `f1` on both `df1` and `df2` results in a Series.\n", | |
"\n", | |
"Why?" | |
], | |
"metadata": {} | |
} | |
], | |
"metadata": { | |
"orig_nbformat": 4, | |
"language_info": { | |
"name": "python", | |
"version": "3.9.7", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3.9.7 64-bit" | |
}, | |
"interpreter": { | |
"hash": "4cd7ab41f5fca4b9b44701077e38c5ffd31fe66a6cab21e0214b68d958d0e462" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment