Created
October 26, 2018 08:31
-
-
Save sancau/2b64492ef63ac35df452b958b559db22 to your computer and use it in GitHub Desktop.
Windowed comparison in Pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 254, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>value</th>\n", | |
" <th>meta1</th>\n", | |
" <th>meta2</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>index</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>100</td>\n", | |
" <td>AAA</td>\n", | |
" <td>BBB</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>200</td>\n", | |
" <td>CCC</td>\n", | |
" <td>DDD</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>300</td>\n", | |
" <td>FFF</td>\n", | |
" <td>EEEE</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>200</td>\n", | |
" <td>FFF</td>\n", | |
" <td>EEEE</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>100</td>\n", | |
" <td>FFF</td>\n", | |
" <td>EEEE</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>50</td>\n", | |
" <td>FFF</td>\n", | |
" <td>QQQQ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>110</td>\n", | |
" <td>FFF</td>\n", | |
" <td>QQQQ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>520</td>\n", | |
" <td>FFF</td>\n", | |
" <td>QQQQ</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" value meta1 meta2\n", | |
"index \n", | |
"1 100 AAA BBB\n", | |
"2 200 CCC DDD\n", | |
"3 300 FFF EEEE\n", | |
"4 200 FFF EEEE\n", | |
"5 100 FFF EEEE\n", | |
"6 50 FFF QQQQ\n", | |
"7 110 FFF QQQQ\n", | |
"8 520 FFF QQQQ" | |
] | |
}, | |
"execution_count": 254, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"raw = \"\"\"\n", | |
"1 100 AAA BBB\n", | |
"2 200 CCC DDD\n", | |
"3 300 FFF EEEE\n", | |
"4 200 FFF EEEE\n", | |
"5 100 FFF EEEE\n", | |
"6 50 FFF QQQQ\n", | |
"7 110 FFF QQQQ\n", | |
"8 520 FFF QQQQ\n", | |
"\"\"\"\n", | |
"data = [line.split() for line in raw.split('\\n') if line]\n", | |
"df = pd.DataFrame(data, columns=['index', 'value', 'meta1', 'meta2']).set_index('index')\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 255, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def process(df, *, col, n, keep_initial_index=False, mask_only=False):\n", | |
" df['mask'] = df[col].rolling(n + 1).apply(\n", | |
" lambda xs: xs[0] != pd.np.max(xs),\n", | |
" raw=True,\n", | |
" ).shift(-n).fillna(0).astype(bool)\n", | |
" \n", | |
" if mask_only:\n", | |
" return df\n", | |
" \n", | |
" df = df.loc[df['mask'], :]\n", | |
" df = df.drop('mask', axis=1)\n", | |
" \n", | |
" if not keep_initial_index:\n", | |
" df = df.reset_index(drop=True)\n", | |
" \n", | |
" return df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 256, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>value</th>\n", | |
" <th>meta1</th>\n", | |
" <th>meta2</th>\n", | |
" <th>mask</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>index</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>100</td>\n", | |
" <td>AAA</td>\n", | |
" <td>BBB</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>200</td>\n", | |
" <td>CCC</td>\n", | |
" <td>DDD</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>300</td>\n", | |
" <td>FFF</td>\n", | |
" <td>EEEE</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>200</td>\n", | |
" <td>FFF</td>\n", | |
" <td>EEEE</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>100</td>\n", | |
" <td>FFF</td>\n", | |
" <td>EEEE</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>50</td>\n", | |
" <td>FFF</td>\n", | |
" <td>QQQQ</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>110</td>\n", | |
" <td>FFF</td>\n", | |
" <td>QQQQ</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>520</td>\n", | |
" <td>FFF</td>\n", | |
" <td>QQQQ</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" value meta1 meta2 mask\n", | |
"index \n", | |
"1 100 AAA BBB True\n", | |
"2 200 CCC DDD True\n", | |
"3 300 FFF EEEE False\n", | |
"4 200 FFF EEEE False\n", | |
"5 100 FFF EEEE True\n", | |
"6 50 FFF QQQQ True\n", | |
"7 110 FFF QQQQ False\n", | |
"8 520 FFF QQQQ False" | |
] | |
}, | |
"execution_count": 256, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"process(df, col='value', n=2, mask_only=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 257, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>value</th>\n", | |
" <th>meta1</th>\n", | |
" <th>meta2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>100</td>\n", | |
" <td>AAA</td>\n", | |
" <td>BBB</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>200</td>\n", | |
" <td>CCC</td>\n", | |
" <td>DDD</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>100</td>\n", | |
" <td>FFF</td>\n", | |
" <td>EEEE</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>50</td>\n", | |
" <td>FFF</td>\n", | |
" <td>QQQQ</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" value meta1 meta2\n", | |
"0 100 AAA BBB\n", | |
"1 200 CCC DDD\n", | |
"2 100 FFF EEEE\n", | |
"3 50 FFF QQQQ" | |
] | |
}, | |
"execution_count": 257, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"process(df, col='value', n=2)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment