Created
October 15, 2015 22:06
-
-
Save phobson/3d59be52ab47efbc9501 to your computer and use it in GitHub Desktop.
Imputing non-detect data in Python and R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
epa_rain_zone | state | location_code | station_name | jurisdiction_county | jurisdiction_city | primary_landuse | secondary_landuse | percent_impervious | start_date | days since last rain | precipitation_depth_(in) | season | parameter | fraction | units | res | qual | drainage_area_acres | latitude | longitude | station | cvcparam | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | IN | ININBAR1 | Basin_R1_control | Marion_County | Indianapolis | Residential | 5/17/2000 | spring | Cadmium | Total | ug/L | 10 | < | 8.36 | 39.75716944 | 85.97535278 | outflow | Cadmium (Cd) | |||||
1 | IN | ININBAR1 | Basin_R1_control | Marion_County | Indianapolis | Residential | 5/25/2001 | spring | Cadmium | Total | ug/L | 10 | < | 8.36 | 39.75716944 | 85.97535278 | outflow | Cadmium (Cd) | |||||
1 | IN | ININBAR2 | Basin_R2_test | Marion_County | Indianapolis | Residential | 5/17/2000 | spring | Cadmium | Total | ug/L | 10 | < | 13.41 | 39.76257222 | 85.97841667 | outflow | Cadmium (Cd) | |||||
1 | IN | ININBAR2 | Basin_R2_test | Marion_County | Indianapolis | Residential | 5/25/2001 | spring | Cadmium | Total | ug/L | 10 | < | 13.41 | 39.76257222 | 85.97841667 | outflow | Cadmium (Cd) | |||||
1 | MA | MABOA001 | Charlestown_29J212 | Suffollk_County | City_of_Boston | Residential | Open Space | 74 | 4/11/1992 | 10 | 0.24 | spring | Cadmium | Total | ug/L | 1 | < | 40.4 | 42.38388889 | 71.07241667 | outflow | Cadmium (Cd) | |
1 | MA | MABOA001 | Charlestown_29J212 | Suffollk_County | City_of_Boston | Residential | Open Space | 74 | 6/1/1992 | 6 | 2.01 | spring | Cadmium | Total | ug/L | 1 | < | 40.4 | 42.38388889 | 71.07241667 | outflow | Cadmium (Cd) | |
1 | MA | MABOA002 | West_Roxebury_13D077_078 | Suffollk_County | City_of_Boston | Residential | 52 | 4/17/1992 | 3 | 0.77 | spring | Cadmium | Total | ug/L | 1 | < | 86.7 | 42.29588889 | 71.14858333 | outflow | Cadmium (Cd) | ||
1 | MA | MABOA002 | West_Roxebury_13D077_078 | Suffollk_County | City_of_Boston | Residential | 52 | 6/1/1992 | 6 | 1.77 | spring | Cadmium | Total | ug/L | 1 | < | 86.7 | 42.29588889 | 71.14858333 | outflow | Cadmium (Cd) | ||
1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 4/10/2001 | >8hrs | 1.72 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | |||
1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 4/5/2002 | >8hrs | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | ||||
1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 5/7/2002 | >8hrs | 1.46 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | |||
1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 5/8/2002 | >8hrs | 1.46 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | |||
1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 6/6/2002 | >8hrs | 0.63 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | |||
1 | MN | MNMISD01 | E_Harriet_Pkwy_W44_St | - | City_of_Minneapolis | Residential | 6/11/2001 | 0.69 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.9231 | 93.2856 | outflow | Cadmium (Cd) | ||||
1 | MN | MNMISD02 | Luella_St_Orange_Ave | - | City_of_Minneapolis | Residential | 6/5/2001 | 0.49 | spring | Cadmium | Total | ug/L | 1.17 | = | 95 | 44.9794 | 93.0189 | outflow | Cadmium (Cd) | ||||
1 | MN | MNMISD04 | Charles_Ave | - | City_of_Minneapolis | Residential | Commercial | 6/5/2001 | 0.49 | spring | Cadmium | Total | ug/L | 0.897 | = | 63 | 44.9594 | 93.1188 | outflow | Cadmium (Cd) | |||
1 | MN | MNMISD05 | E_29_St_31_Ave_S | - | City_of_Minneapolis | Residential | Commercial | 6/5/2001 | 0.49 | spring | Cadmium | Total | ug/L | 1.26 | = | 100 | 44.9501 | 93.227 | outflow | Cadmium (Cd) | |||
1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 4/10/2001 | >8hrs | 1.72 | spring | Cadmium | Total | ug/L | 5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 6/5/2001 | >8hrs | 0.49 | spring | Cadmium | Total | ug/L | 1.26 | = | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 4/4/2002 | >8hrs | spring | Cadmium | Total | ug/L | 0.5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | |||
1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 6/2/2002 | >8hrs | 0.38 | spring | Cadmium | Total | ug/L | 0.5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 6/6/2002 | >8hrs | 0.63 | spring | Cadmium | Total | ug/L | 0.5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 6/19/2002 | >8hrs | 0.6 | spring | Cadmium | Total | ug/L | 0.5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 4/10/2001 | >8hrs | 1.72 | spring | Cadmium | Total | ug/L | 5 | < | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | ||
1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 4/5/2002 | >8hrs | spring | Cadmium | Total | ug/L | 0.5 | < | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | |||
1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 5/5/2002 | >8hrs | 0.62 | spring | Cadmium | Total | ug/L | 2.19 | = | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | ||
1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 5/7/2002 | >8hrs | 1.46 | spring | Cadmium | Total | ug/L | 8.35 | = | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | ||
1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 6/3/2002 | >8hrs | 0.7 | spring | Cadmium | Total | ug/L | 0.5 | < | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | ||
1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 4/10/2001 | >8hrs | 1.72 | spring | Cadmium | Total | ug/L | 0.5 | < | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | |||
1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 4/5/2002 | >8hrs | spring | Cadmium | Total | ug/L | 0.5 | < | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | ||||
1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 5/5/2002 | >8hrs | 0.62 | spring | Cadmium | Total | ug/L | 2.24 | = | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | |||
1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 5/7/2002 | >8hrs | 1.46 | spring | Cadmium | Total | ug/L | 0.5 | < | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | |||
1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 6/3/2002 | >8hrs | 0.7 | spring | Cadmium | Total | ug/L | 0.5 | < | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | |||
1 | WI | WIMALAAV | Lakeland_Ave | - | Madison | Residential | 6/17/1993 | 0.57 | spring | Cadmium | Total | ug/L | 1 | = | 73.7 | outflow | Cadmium (Cd) | ||||||
1 | WI | WIMALAAV | Lakeland_Ave | - | Madison | Residential | 4/12/1994 | 0.5 | spring | Cadmium | Total | ug/L | 1.6 | = | 73.7 | outflow | Cadmium (Cd) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### https://github.com/Geosyntec/wqio" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>epa_rain_zone</th>\n", | |
" <th>primary_landuse</th>\n", | |
" <th>season</th>\n", | |
" <th>parameter</th>\n", | |
" <th>fraction</th>\n", | |
" <th>units</th>\n", | |
" <th>res</th>\n", | |
" <th>qual</th>\n", | |
" <th>cen</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>Residential</td>\n", | |
" <td>spring</td>\n", | |
" <td>Cadmium</td>\n", | |
" <td>Total</td>\n", | |
" <td>ug/L</td>\n", | |
" <td>10</td>\n", | |
" <td><</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>Residential</td>\n", | |
" <td>spring</td>\n", | |
" <td>Cadmium</td>\n", | |
" <td>Total</td>\n", | |
" <td>ug/L</td>\n", | |
" <td>10</td>\n", | |
" <td><</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>Residential</td>\n", | |
" <td>spring</td>\n", | |
" <td>Cadmium</td>\n", | |
" <td>Total</td>\n", | |
" <td>ug/L</td>\n", | |
" <td>10</td>\n", | |
" <td><</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>Residential</td>\n", | |
" <td>spring</td>\n", | |
" <td>Cadmium</td>\n", | |
" <td>Total</td>\n", | |
" <td>ug/L</td>\n", | |
" <td>10</td>\n", | |
" <td><</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>Residential</td>\n", | |
" <td>spring</td>\n", | |
" <td>Cadmium</td>\n", | |
" <td>Total</td>\n", | |
" <td>ug/L</td>\n", | |
" <td>1</td>\n", | |
" <td><</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" epa_rain_zone primary_landuse season parameter fraction units res qual \\\n", | |
"0 1 Residential spring Cadmium Total ug/L 10 < \n", | |
"1 1 Residential spring Cadmium Total ug/L 10 < \n", | |
"2 1 Residential spring Cadmium Total ug/L 10 < \n", | |
"3 1 Residential spring Cadmium Total ug/L 10 < \n", | |
"4 1 Residential spring Cadmium Total ug/L 1 < \n", | |
"\n", | |
" cen \n", | |
"0 True \n", | |
"1 True \n", | |
"2 True \n", | |
"3 True \n", | |
"4 True " | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas\n", | |
"\n", | |
"import wqio\n", | |
"\n", | |
"main_cols = [\n", | |
" 'epa_rain_zone', 'season', 'primary_landuse',\n", | |
" 'parameter', 'fraction', 'units', 'res', 'qual'\n", | |
"]\n", | |
"\n", | |
"df = pandas.read_csv(\"nsqdata_example_subset.csv\", usecols=main_cols)\n", | |
"df['cen'] = df['qual'].isin(['<'])\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.35189348345894644" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfros = wqio.robustros.RobustROSEstimator(data=df)\n", | |
"np.median(dfros.estimated_values)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment