Skip to content

Instantly share code, notes, and snippets.

@porimol
Last active December 8, 2017 08:15
Show Gist options
  • Save porimol/5401cb2d3d266ad2828c1d04fc172ed9 to your computer and use it in GitHub Desktop.
Save porimol/5401cb2d3d266ad2828c1d04fc172ed9 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 88,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df = pd.read_csv('test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>column1</th>\n",
" <th>column2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>No</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>yes</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>yes</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>yes</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" column1 column2\n",
"0 No 1.0\n",
"1 yes NaN\n",
"2 No NaN\n",
"3 No 2.0\n",
"4 No NaN\n",
"5 No NaN\n",
"6 No NaN\n",
"7 No NaN\n",
"8 No NaN\n",
"9 No NaN\n",
"10 No NaN\n",
"11 No NaN\n",
"12 No 5.0\n",
"13 yes NaN\n",
"14 yes NaN\n",
"15 yes 3.0\n",
"16 yes NaN\n",
"17 yes NaN\n",
"18 yes 4.0\n",
"19 yes NaN\n",
"20 yes NaN\n",
"21 yes 4.0"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# df['column2'] = df.apply(lambda cols: df['column2'].mean() if cols[0]=='yes' else cols[1], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df['column2'] = df['column2'].fillna(df.apply(lambda cols: df['column2'].mean() if cols[0]=='yes' else cols[1], axis=1))"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>column1</th>\n",
" <th>column2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>No</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>yes</td>\n",
" <td>3.166667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>No</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>yes</td>\n",
" <td>3.166667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>yes</td>\n",
" <td>3.166667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>yes</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>yes</td>\n",
" <td>3.166667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>yes</td>\n",
" <td>3.166667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>yes</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>yes</td>\n",
" <td>3.166667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>yes</td>\n",
" <td>3.166667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>yes</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" column1 column2\n",
"0 No 1.000000\n",
"1 yes 3.166667\n",
"2 No NaN\n",
"3 No 2.000000\n",
"4 No NaN\n",
"5 No NaN\n",
"6 No NaN\n",
"7 No NaN\n",
"8 No NaN\n",
"9 No NaN\n",
"10 No NaN\n",
"11 No NaN\n",
"12 No 5.000000\n",
"13 yes 3.166667\n",
"14 yes 3.166667\n",
"15 yes 3.000000\n",
"16 yes 3.166667\n",
"17 yes 3.166667\n",
"18 yes 4.000000\n",
"19 yes 3.166667\n",
"20 yes 3.166667\n",
"21 yes 4.000000"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment