Created
October 15, 2015 22:06
-
-
Save phobson/3d59be52ab47efbc9501 to your computer and use it in GitHub Desktop.
Imputing non-detect data in Python and R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| epa_rain_zone | state | location_code | station_name | jurisdiction_county | jurisdiction_city | primary_landuse | secondary_landuse | percent_impervious | start_date | days since last rain | precipitation_depth_(in) | season | parameter | fraction | units | res | qual | drainage_area_acres | latitude | longitude | station | cvcparam | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | IN | ININBAR1 | Basin_R1_control | Marion_County | Indianapolis | Residential | 5/17/2000 | spring | Cadmium | Total | ug/L | 10 | < | 8.36 | 39.75716944 | 85.97535278 | outflow | Cadmium (Cd) | |||||
| 1 | IN | ININBAR1 | Basin_R1_control | Marion_County | Indianapolis | Residential | 5/25/2001 | spring | Cadmium | Total | ug/L | 10 | < | 8.36 | 39.75716944 | 85.97535278 | outflow | Cadmium (Cd) | |||||
| 1 | IN | ININBAR2 | Basin_R2_test | Marion_County | Indianapolis | Residential | 5/17/2000 | spring | Cadmium | Total | ug/L | 10 | < | 13.41 | 39.76257222 | 85.97841667 | outflow | Cadmium (Cd) | |||||
| 1 | IN | ININBAR2 | Basin_R2_test | Marion_County | Indianapolis | Residential | 5/25/2001 | spring | Cadmium | Total | ug/L | 10 | < | 13.41 | 39.76257222 | 85.97841667 | outflow | Cadmium (Cd) | |||||
| 1 | MA | MABOA001 | Charlestown_29J212 | Suffollk_County | City_of_Boston | Residential | Open Space | 74 | 4/11/1992 | 10 | 0.24 | spring | Cadmium | Total | ug/L | 1 | < | 40.4 | 42.38388889 | 71.07241667 | outflow | Cadmium (Cd) | |
| 1 | MA | MABOA001 | Charlestown_29J212 | Suffollk_County | City_of_Boston | Residential | Open Space | 74 | 6/1/1992 | 6 | 2.01 | spring | Cadmium | Total | ug/L | 1 | < | 40.4 | 42.38388889 | 71.07241667 | outflow | Cadmium (Cd) | |
| 1 | MA | MABOA002 | West_Roxebury_13D077_078 | Suffollk_County | City_of_Boston | Residential | 52 | 4/17/1992 | 3 | 0.77 | spring | Cadmium | Total | ug/L | 1 | < | 86.7 | 42.29588889 | 71.14858333 | outflow | Cadmium (Cd) | ||
| 1 | MA | MABOA002 | West_Roxebury_13D077_078 | Suffollk_County | City_of_Boston | Residential | 52 | 6/1/1992 | 6 | 1.77 | spring | Cadmium | Total | ug/L | 1 | < | 86.7 | 42.29588889 | 71.14858333 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 4/10/2001 | >8hrs | 1.72 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 4/5/2002 | >8hrs | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | ||||
| 1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 5/7/2002 | >8hrs | 1.46 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 5/8/2002 | >8hrs | 1.46 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNMIHAPK | Site1_Harriet_Pkwy_44th_St | Hennepin_County | Minneapolis | Residential | 6/6/2002 | >8hrs | 0.63 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.92255833 | 93.29663889 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNMISD01 | E_Harriet_Pkwy_W44_St | - | City_of_Minneapolis | Residential | 6/11/2001 | 0.69 | spring | Cadmium | Total | ug/L | 0.5 | < | 143 | 44.9231 | 93.2856 | outflow | Cadmium (Cd) | ||||
| 1 | MN | MNMISD02 | Luella_St_Orange_Ave | - | City_of_Minneapolis | Residential | 6/5/2001 | 0.49 | spring | Cadmium | Total | ug/L | 1.17 | = | 95 | 44.9794 | 93.0189 | outflow | Cadmium (Cd) | ||||
| 1 | MN | MNMISD04 | Charles_Ave | - | City_of_Minneapolis | Residential | Commercial | 6/5/2001 | 0.49 | spring | Cadmium | Total | ug/L | 0.897 | = | 63 | 44.9594 | 93.1188 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNMISD05 | E_29_St_31_Ave_S | - | City_of_Minneapolis | Residential | Commercial | 6/5/2001 | 0.49 | spring | Cadmium | Total | ug/L | 1.26 | = | 100 | 44.9501 | 93.227 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 4/10/2001 | >8hrs | 1.72 | spring | Cadmium | Total | ug/L | 5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 6/5/2001 | >8hrs | 0.49 | spring | Cadmium | Total | ug/L | 1.26 | = | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 4/4/2002 | >8hrs | spring | Cadmium | Total | ug/L | 0.5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 6/2/2002 | >8hrs | 0.38 | spring | Cadmium | Total | ug/L | 0.5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 6/6/2002 | >8hrs | 0.63 | spring | Cadmium | Total | ug/L | 0.5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNMISNAV | Site5a_Snelling_ave_S_and_E_24th_St | Hennepin_County | City_of_Minneapolis | Residential | Industrial | 6/19/2002 | >8hrs | 0.6 | spring | Cadmium | Total | ug/L | 0.5 | < | 113 | 44.95922222 | 93.24363889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 4/10/2001 | >8hrs | 1.72 | spring | Cadmium | Total | ug/L | 5 | < | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 4/5/2002 | >8hrs | spring | Cadmium | Total | ug/L | 0.5 | < | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 5/5/2002 | >8hrs | 0.62 | spring | Cadmium | Total | ug/L | 2.19 | = | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 5/7/2002 | >8hrs | 1.46 | spring | Cadmium | Total | ug/L | 8.35 | = | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNSPCHAV | Site4_Charles_ave_Mackubin_to_Arundel_St | Ramsey_County | St_Paul | Residential | Commercial | 6/3/2002 | >8hrs | 0.7 | spring | Cadmium | Total | ug/L | 0.5 | < | 63 | 44.95758889 | 93.12003889 | outflow | Cadmium (Cd) | ||
| 1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 4/10/2001 | >8hrs | 1.72 | spring | Cadmium | Total | ug/L | 0.5 | < | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 4/5/2002 | >8hrs | spring | Cadmium | Total | ug/L | 0.5 | < | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | ||||
| 1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 5/5/2002 | >8hrs | 0.62 | spring | Cadmium | Total | ug/L | 2.24 | = | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 5/7/2002 | >8hrs | 1.46 | spring | Cadmium | Total | ug/L | 0.5 | < | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | |||
| 1 | MN | MNSPLUOR | Site2_Luella_St_at_Orange_ave | Ramsey_County | St_Paul | Residential | 6/3/2002 | >8hrs | 0.7 | spring | Cadmium | Total | ug/L | 0.5 | < | 95 | 44.97933056 | 93.017675 | outflow | Cadmium (Cd) | |||
| 1 | WI | WIMALAAV | Lakeland_Ave | - | Madison | Residential | 6/17/1993 | 0.57 | spring | Cadmium | Total | ug/L | 1 | = | 73.7 | outflow | Cadmium (Cd) | ||||||
| 1 | WI | WIMALAAV | Lakeland_Ave | - | Madison | Residential | 4/12/1994 | 0.5 | spring | Cadmium | Total | ug/L | 1.6 | = | 73.7 | outflow | Cadmium (Cd) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### https://cran.r-project.org/web/packages/NADA/index.html" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "Loading required package: survival\n", | |
| "\n", | |
| "Attaching package: 'NADA'\n", | |
| "\n", | |
| "The following object is masked from 'package:stats':\n", | |
| "\n", | |
| " cor\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| "<thead><tr><th></th><th scope=col>epa_rain_zone</th><th scope=col>season</th><th scope=col>primary_landuse</th><th scope=col>parameter</th><th scope=col>fraction</th><th scope=col>units</th><th scope=col>res</th><th scope=col>qual</th></tr></thead>\n", | |
| "<tbody>\n", | |
| "\t<tr><th scope=row>1</th><td>1</td><td>spring</td><td>Residential</td><td>Cadmium</td><td>Total</td><td>ug/L</td><td>10</td><td><</td></tr>\n", | |
| "\t<tr><th scope=row>2</th><td>1</td><td>spring</td><td>Residential</td><td>Cadmium</td><td>Total</td><td>ug/L</td><td>10</td><td><</td></tr>\n", | |
| "\t<tr><th scope=row>3</th><td>1</td><td>spring</td><td>Residential</td><td>Cadmium</td><td>Total</td><td>ug/L</td><td>10</td><td><</td></tr>\n", | |
| "\t<tr><th scope=row>4</th><td>1</td><td>spring</td><td>Residential</td><td>Cadmium</td><td>Total</td><td>ug/L</td><td>10</td><td><</td></tr>\n", | |
| "\t<tr><th scope=row>5</th><td>1</td><td>spring</td><td>Residential</td><td>Cadmium</td><td>Total</td><td>ug/L</td><td>1</td><td><</td></tr>\n", | |
| "\t<tr><th scope=row>6</th><td>1</td><td>spring</td><td>Residential</td><td>Cadmium</td><td>Total</td><td>ug/L</td><td>1</td><td><</td></tr>\n", | |
| "</tbody>\n", | |
| "</table>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{tabular}{r|llllllll}\n", | |
| " & epa_rain_zone & season & primary_landuse & parameter & fraction & units & res & qual\\\\\n", | |
| "\\hline\n", | |
| "\t1 & 1 & spring & Residential & Cadmium & Total & ug/L & 10 & <\\\\\n", | |
| "\t2 & 1 & spring & Residential & Cadmium & Total & ug/L & 10 & <\\\\\n", | |
| "\t3 & 1 & spring & Residential & Cadmium & Total & ug/L & 10 & <\\\\\n", | |
| "\t4 & 1 & spring & Residential & Cadmium & Total & ug/L & 10 & <\\\\\n", | |
| "\t5 & 1 & spring & Residential & Cadmium & Total & ug/L & 1 & <\\\\\n", | |
| "\t6 & 1 & spring & Residential & Cadmium & Total & ug/L & 1 & <\\\\\n", | |
| "\\end{tabular}\n" | |
| ], | |
| "text/plain": [ | |
| " epa_rain_zone season primary_landuse parameter fraction units res qual\n", | |
| "1 1 spring Residential Cadmium Total ug/L 10 <\n", | |
| "2 1 spring Residential Cadmium Total ug/L 10 <\n", | |
| "3 1 spring Residential Cadmium Total ug/L 10 <\n", | |
| "4 1 spring Residential Cadmium Total ug/L 10 <\n", | |
| "5 1 spring Residential Cadmium Total ug/L 1 <\n", | |
| "6 1 spring Residential Cadmium Total ug/L 1 <" | |
| ] | |
| }, | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "library(\"NADA\")\n", | |
| "\n", | |
| "main_columns = c(\n", | |
| " 'epa_rain_zone', 'season', 'primary_landuse',\n", | |
| " 'parameter', 'fraction', 'units', 'res', 'qual'\n", | |
| ")\n", | |
| "df = read.csv(\"nsqdata_example_subset.csv\")[main_columns]\n", | |
| "head(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Use the detections limits, compute median" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "1" | |
| ], | |
| "text/latex": [ | |
| "1" | |
| ], | |
| "text/markdown": [ | |
| "1" | |
| ], | |
| "text/plain": [ | |
| "[1] 1" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "median(df$res)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Use ROS to impute censored data and show the median" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "Warning message:\n", | |
| "In NADA::ros(df$res, df$qual == \"<\"): Dropped censored values that exceed max of uncensored values." | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "0.351893483458947" | |
| ], | |
| "text/latex": [ | |
| "0.351893483458947" | |
| ], | |
| "text/markdown": [ | |
| "0.351893483458947" | |
| ], | |
| "text/plain": [ | |
| "[1] 0.3518935" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dfros = NADA::ros(df$res, df$qual=='<')\n", | |
| "median(dfros)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "R", | |
| "language": "R", | |
| "name": "ir" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": "r", | |
| "file_extension": ".r", | |
| "mimetype": "text/x-r-source", | |
| "name": "R", | |
| "pygments_lexer": "r", | |
| "version": "3.1.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment