Created
March 31, 2023 21:21
-
-
Save walkerh/aa24b05085e69d63f0fc5ffafd50a60c to your computer and use it in GitHub Desktop.
Attempts to fix missing data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "17062f26", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "9def47f6", | |
"metadata": {}, | |
"source": [ | |
"# A simple set" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "d1fd9c97", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df1 = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=['A', 'B'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "4a7e7c0d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B\n", | |
"0 1 2\n", | |
"1 3 4\n", | |
"2 5 6" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "cf31a951", | |
"metadata": {}, | |
"source": [ | |
"## Introducing `DataFrame.apply`\n", | |
"\n", | |
"https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html#pandas.DataFrame.apply" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "ab3fb7df", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"A 9\n", | |
"B 12\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df1.apply(np.sum, axis=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "1873d275", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"result = df1.apply(np.sum, axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "dbd88fd8", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"pandas.core.series.Series" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"type(result)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "6d016f95", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 3\n", | |
"1 7\n", | |
"2 11\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "4b5a4dd7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 (Series, A 1\\nB 2\\nName: 0, dtype: int64)\n", | |
"1 (Series, A 3\\nB 4\\nName: 1, dtype: int64)\n", | |
"2 (Series, A 5\\nB 6\\nName: 2, dtype: int64)\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def identify(input_series):\n", | |
" return type(input_series).__name__, str(input_series)\n", | |
"\n", | |
"df1.apply(identify, axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "c679aaee", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 4\n", | |
"1 16\n", | |
"2 36\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df1.apply(lambda s: (s[\"A\"] + 1) * s[\"B\"], axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "d05a759b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"result = df1.apply(lambda s: pd.Series({\"C\": s.A*2, \"D\": s.B+1}), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "91e11df0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"pandas.core.frame.DataFrame" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"type(result)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "27324fd3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>6</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>10</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" C D\n", | |
"0 2 3\n", | |
"1 6 5\n", | |
"2 10 7" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7a069ca7", | |
"metadata": {}, | |
"source": [ | |
"If the return value of `apply` is scalar, you get a Series. I the return value is a `Series`, you get a `DataFrame`." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "2620785c", | |
"metadata": {}, | |
"source": [ | |
"# A more complex set\n", | |
"\n", | |
"This is a modified version of this StackOverflow answer: https://stackoverflow.com/a/62159000/642372" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "e2c83114", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"header = [\"year\", \"name\", \"miles\"]\n", | |
"data = [\n", | |
" ('2010', 'Paul', 6.0),\n", | |
" ('2010', 'Paul', 4.0),\n", | |
" ('2010', 'Paul', np.nan),\n", | |
" ('2011', 'Paul', 7.0),\n", | |
" ('2011', 'Paul', 8.0),\n", | |
" ('2011', 'Paul', np.nan),\n", | |
" ('2012', 'Paul', 9.0),\n", | |
" ('2012', 'Paul', 10.9),\n", | |
" ('2012', 'Paul', 12.0),\n", | |
"]\n", | |
"df2 = pd.DataFrame(data, columns=header)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "e39b85fa", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>name</th>\n", | |
" <th>miles</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>7.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>8.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>9.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>10.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>12.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" year name miles\n", | |
"0 2010 Paul 6.0\n", | |
"1 2010 Paul 4.0\n", | |
"2 2010 Paul NaN\n", | |
"3 2011 Paul 7.0\n", | |
"4 2011 Paul 8.0\n", | |
"5 2011 Paul NaN\n", | |
"6 2012 Paul 9.0\n", | |
"7 2012 Paul 10.9\n", | |
"8 2012 Paul 12.0" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "9a63e832", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>miles</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>year</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2010</th>\n", | |
" <td>5.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2011</th>\n", | |
" <td>7.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012</th>\n", | |
" <td>10.633333</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" miles\n", | |
"year \n", | |
"2010 5.000000\n", | |
"2011 7.500000\n", | |
"2012 10.633333" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df2.groupby(['year']).mean(numeric_only=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "82730846", | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df2['miles_new'] = df2['miles'].fillna(\n", | |
" df2.groupby(['year'])['miles'].transform('mean')\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "0e10c31d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>name</th>\n", | |
" <th>miles</th>\n", | |
" <th>miles_new</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>6.0</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>4.0</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>NaN</td>\n", | |
" <td>5.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>7.0</td>\n", | |
" <td>7.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>8.0</td>\n", | |
" <td>8.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>NaN</td>\n", | |
" <td>7.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>9.0</td>\n", | |
" <td>9.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>10.9</td>\n", | |
" <td>10.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>12.0</td>\n", | |
" <td>12.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" year name miles miles_new\n", | |
"0 2010 Paul 6.0 6.0\n", | |
"1 2010 Paul 4.0 4.0\n", | |
"2 2010 Paul NaN 5.0\n", | |
"3 2011 Paul 7.0 7.0\n", | |
"4 2011 Paul 8.0 8.0\n", | |
"5 2011 Paul NaN 7.5\n", | |
"6 2012 Paul 9.0 9.0\n", | |
"7 2012 Paul 10.9 10.9\n", | |
"8 2012 Paul 12.0 12.0" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "c4574c57", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"defaults = {\"2010\": 5.1, \"2011\": 8.1, \"2012\": 10.6}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "2717675a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 (Series, year 2010\\nname Paul\\...\n", | |
"1 (Series, year 2010\\nname Paul\\...\n", | |
"2 (Series, year 2010\\nname Paul\\...\n", | |
"3 (Series, year 2011\\nname Paul\\...\n", | |
"4 (Series, year 2011\\nname Paul\\...\n", | |
"5 (Series, year 2011\\nname Paul\\...\n", | |
"6 (Series, year 2012\\nname Paul\\...\n", | |
"7 (Series, year 2012\\nname Paul\\...\n", | |
"8 (Series, year 2012\\nname Paul\\...\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df2.apply(identify, axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "5e2160a9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>name</th>\n", | |
" <th>miles</th>\n", | |
" <th>miles_new</th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>6.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>2010</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>4.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>2010</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2010</td>\n", | |
" <td>Paul</td>\n", | |
" <td>NaN</td>\n", | |
" <td>5.0</td>\n", | |
" <td>2010</td>\n", | |
" <td>5.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>7.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>2011</td>\n", | |
" <td>7.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>8.0</td>\n", | |
" <td>8.0</td>\n", | |
" <td>2011</td>\n", | |
" <td>8.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2011</td>\n", | |
" <td>Paul</td>\n", | |
" <td>NaN</td>\n", | |
" <td>7.5</td>\n", | |
" <td>2011</td>\n", | |
" <td>8.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>9.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>2012</td>\n", | |
" <td>9.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>10.9</td>\n", | |
" <td>10.9</td>\n", | |
" <td>2012</td>\n", | |
" <td>10.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>2012</td>\n", | |
" <td>Paul</td>\n", | |
" <td>12.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>2012</td>\n", | |
" <td>12.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" year name miles miles_new A B\n", | |
"0 2010 Paul 6.0 6.0 2010 6.0\n", | |
"1 2010 Paul 4.0 4.0 2010 4.0\n", | |
"2 2010 Paul NaN 5.0 2010 5.1\n", | |
"3 2011 Paul 7.0 7.0 2011 7.0\n", | |
"4 2011 Paul 8.0 8.0 2011 8.0\n", | |
"5 2011 Paul NaN 7.5 2011 8.1\n", | |
"6 2012 Paul 9.0 9.0 2012 9.0\n", | |
"7 2012 Paul 10.9 10.9 2012 10.9\n", | |
"8 2012 Paul 12.0 12.0 2012 12.0" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.concat(\n", | |
" [\n", | |
" df2, \n", | |
" df2.apply(\n", | |
" lambda s: pd.Series(\n", | |
" {\n", | |
" \"A\": s[\"year\"], \n", | |
" \"B\": defaults[s[\"year\"]] if np.isnan(s[\"miles\"]) else s[\"miles\"]}\n", | |
" ), \n", | |
" axis=1\n", | |
" )\n", | |
" ], \n", | |
" axis=1\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "5cc74700", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment