Skip to content

Instantly share code, notes, and snippets.

@sancau
Created October 26, 2018 08:31
Show Gist options
  • Save sancau/2b64492ef63ac35df452b958b559db22 to your computer and use it in GitHub Desktop.
Save sancau/2b64492ef63ac35df452b958b559db22 to your computer and use it in GitHub Desktop.
Windowed comparison in Pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 254,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>value</th>\n",
" <th>meta1</th>\n",
" <th>meta2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100</td>\n",
" <td>AAA</td>\n",
" <td>BBB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200</td>\n",
" <td>CCC</td>\n",
" <td>DDD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>300</td>\n",
" <td>FFF</td>\n",
" <td>EEEE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200</td>\n",
" <td>FFF</td>\n",
" <td>EEEE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>100</td>\n",
" <td>FFF</td>\n",
" <td>EEEE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>50</td>\n",
" <td>FFF</td>\n",
" <td>QQQQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>110</td>\n",
" <td>FFF</td>\n",
" <td>QQQQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>520</td>\n",
" <td>FFF</td>\n",
" <td>QQQQ</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" value meta1 meta2\n",
"index \n",
"1 100 AAA BBB\n",
"2 200 CCC DDD\n",
"3 300 FFF EEEE\n",
"4 200 FFF EEEE\n",
"5 100 FFF EEEE\n",
"6 50 FFF QQQQ\n",
"7 110 FFF QQQQ\n",
"8 520 FFF QQQQ"
]
},
"execution_count": 254,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw = \"\"\"\n",
"1 100 AAA BBB\n",
"2 200 CCC DDD\n",
"3 300 FFF EEEE\n",
"4 200 FFF EEEE\n",
"5 100 FFF EEEE\n",
"6 50 FFF QQQQ\n",
"7 110 FFF QQQQ\n",
"8 520 FFF QQQQ\n",
"\"\"\"\n",
"data = [line.split() for line in raw.split('\\n') if line]\n",
"df = pd.DataFrame(data, columns=['index', 'value', 'meta1', 'meta2']).set_index('index')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 255,
"metadata": {},
"outputs": [],
"source": [
"def process(df, *, col, n, keep_initial_index=False, mask_only=False):\n",
" df['mask'] = df[col].rolling(n + 1).apply(\n",
" lambda xs: xs[0] != pd.np.max(xs),\n",
" raw=True,\n",
" ).shift(-n).fillna(0).astype(bool)\n",
" \n",
" if mask_only:\n",
" return df\n",
" \n",
" df = df.loc[df['mask'], :]\n",
" df = df.drop('mask', axis=1)\n",
" \n",
" if not keep_initial_index:\n",
" df = df.reset_index(drop=True)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 256,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>value</th>\n",
" <th>meta1</th>\n",
" <th>meta2</th>\n",
" <th>mask</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100</td>\n",
" <td>AAA</td>\n",
" <td>BBB</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200</td>\n",
" <td>CCC</td>\n",
" <td>DDD</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>300</td>\n",
" <td>FFF</td>\n",
" <td>EEEE</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200</td>\n",
" <td>FFF</td>\n",
" <td>EEEE</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>100</td>\n",
" <td>FFF</td>\n",
" <td>EEEE</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>50</td>\n",
" <td>FFF</td>\n",
" <td>QQQQ</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>110</td>\n",
" <td>FFF</td>\n",
" <td>QQQQ</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>520</td>\n",
" <td>FFF</td>\n",
" <td>QQQQ</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" value meta1 meta2 mask\n",
"index \n",
"1 100 AAA BBB True\n",
"2 200 CCC DDD True\n",
"3 300 FFF EEEE False\n",
"4 200 FFF EEEE False\n",
"5 100 FFF EEEE True\n",
"6 50 FFF QQQQ True\n",
"7 110 FFF QQQQ False\n",
"8 520 FFF QQQQ False"
]
},
"execution_count": 256,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"process(df, col='value', n=2, mask_only=True)"
]
},
{
"cell_type": "code",
"execution_count": 257,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>value</th>\n",
" <th>meta1</th>\n",
" <th>meta2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100</td>\n",
" <td>AAA</td>\n",
" <td>BBB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200</td>\n",
" <td>CCC</td>\n",
" <td>DDD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100</td>\n",
" <td>FFF</td>\n",
" <td>EEEE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>50</td>\n",
" <td>FFF</td>\n",
" <td>QQQQ</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" value meta1 meta2\n",
"0 100 AAA BBB\n",
"1 200 CCC DDD\n",
"2 100 FFF EEEE\n",
"3 50 FFF QQQQ"
]
},
"execution_count": 257,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"process(df, col='value', n=2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment