Last active
October 17, 2021 13:13
-
-
Save RenSys/4262b0e9710fd8782f8eefc04d89820d to your computer and use it in GitHub Desktop.
[Pipe - Method Chaining] #pandas #notebook #pipe
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import seaborn as sns", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df = sns.load_dataset('tips')", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.groupby('day').pipe(get_first_two_rows_in_each_group)", | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 17, | |
"data": { | |
"text/plain": " total_bill tip sex smoker day time size\n0 16.99 1.01 Female No Sun Dinner 2\n1 10.34 1.66 Male No Sun Dinner 3\n19 20.65 3.35 Male No Sat Dinner 3\n20 17.92 4.08 Male No Sat Dinner 2\n77 27.20 4.00 Male No Thur Lunch 4\n78 22.76 3.00 Male No Thur Lunch 2\n90 28.97 3.00 Male Yes Fri Dinner 2\n91 22.49 3.50 Male No Fri Dinner 2", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>total_bill</th>\n <th>tip</th>\n <th>sex</th>\n <th>smoker</th>\n <th>day</th>\n <th>time</th>\n <th>size</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>16.99</td>\n <td>1.01</td>\n <td>Female</td>\n <td>No</td>\n <td>Sun</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10.34</td>\n <td>1.66</td>\n <td>Male</td>\n <td>No</td>\n <td>Sun</td>\n <td>Dinner</td>\n <td>3</td>\n </tr>\n <tr>\n <th>19</th>\n <td>20.65</td>\n <td>3.35</td>\n <td>Male</td>\n <td>No</td>\n <td>Sat</td>\n <td>Dinner</td>\n <td>3</td>\n </tr>\n <tr>\n <th>20</th>\n <td>17.92</td>\n <td>4.08</td>\n <td>Male</td>\n <td>No</td>\n <td>Sat</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n <tr>\n <th>77</th>\n <td>27.20</td>\n <td>4.00</td>\n <td>Male</td>\n <td>No</td>\n <td>Thur</td>\n <td>Lunch</td>\n <td>4</td>\n </tr>\n <tr>\n <th>78</th>\n <td>22.76</td>\n <td>3.00</td>\n <td>Male</td>\n <td>No</td>\n <td>Thur</td>\n <td>Lunch</td>\n <td>2</td>\n </tr>\n <tr>\n <th>90</th>\n <td>28.97</td>\n <td>3.00</td>\n <td>Male</td>\n <td>Yes</td>\n <td>Fri</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n <tr>\n <th>91</th>\n <td>22.49</td>\n <td>3.50</td>\n <td>Male</td>\n <td>No</td>\n <td>Fri</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def get_first_two_rows_in_each_group(df):\n return df.head(2)", | |
"execution_count": 16, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.groupby('day').pipe(count_rows).pipe(remove_lunch_times)", | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 19, | |
"data": { | |
"text/plain": " total_bill tip sex smoker day time size\n0 16.99 1.01 Female No Sun Dinner 2\n1 10.34 1.66 Male No Sun Dinner 3\n19 20.65 3.35 Male No Sat Dinner 3\n20 17.92 4.08 Male No Sat Dinner 2\n90 28.97 3.00 Male Yes Fri Dinner 2\n91 22.49 3.50 Male No Fri Dinner 2", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>total_bill</th>\n <th>tip</th>\n <th>sex</th>\n <th>smoker</th>\n <th>day</th>\n <th>time</th>\n <th>size</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>16.99</td>\n <td>1.01</td>\n <td>Female</td>\n <td>No</td>\n <td>Sun</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10.34</td>\n <td>1.66</td>\n <td>Male</td>\n <td>No</td>\n <td>Sun</td>\n <td>Dinner</td>\n <td>3</td>\n </tr>\n <tr>\n <th>19</th>\n <td>20.65</td>\n <td>3.35</td>\n <td>Male</td>\n <td>No</td>\n <td>Sat</td>\n <td>Dinner</td>\n <td>3</td>\n </tr>\n <tr>\n <th>20</th>\n <td>17.92</td>\n <td>4.08</td>\n <td>Male</td>\n <td>No</td>\n <td>Sat</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n <tr>\n <th>90</th>\n <td>28.97</td>\n <td>3.00</td>\n <td>Male</td>\n <td>Yes</td>\n <td>Fri</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n <tr>\n <th>91</th>\n <td>22.49</td>\n <td>3.50</td>\n <td>Male</td>\n <td>No</td>\n <td>Fri</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def remove_lunch_times(df):\n return df[df['time']!='Lunch']", | |
"execution_count": 18, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "(\n df.groupby('day')\n .pipe(count_rows)\n .pipe(remove_lunch_times)\n .pipe(get_sex, 'Female')\n)", | |
"execution_count": 26, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 26, | |
"data": { | |
"text/plain": " total_bill tip sex smoker day time size\n0 16.99 1.01 Female No Sun Dinner 2", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>total_bill</th>\n <th>tip</th>\n <th>sex</th>\n <th>smoker</th>\n <th>day</th>\n <th>time</th>\n <th>size</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>16.99</td>\n <td>1.01</td>\n <td>Female</td>\n <td>No</td>\n <td>Sun</td>\n <td>Dinner</td>\n <td>2</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def get_sex(df, sex):\n return df[df['sex']==sex]", | |
"execution_count": 25, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "(\n df.groupby('day')\n .pipe(count_rows)\n .pipe(remove_lunch_times)\n .pipe(get_sex, 'Female')\n .pipe(select_tip_col)\n)", | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 30, | |
"data": { | |
"text/plain": "0 1.01\nName: tip, dtype: float64" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def select_tip_col(df):\n return df['tip']", | |
"execution_count": 31, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "py36-test", | |
"display_name": "py36-test", | |
"language": "python" | |
}, | |
"hide_input": false, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.3", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "Pandas Pipe - Method Chaining", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment