Skip to content

Instantly share code, notes, and snippets.

@phobson
Created August 23, 2014 01:58
Show Gist options
  • Save phobson/cc9f5e9f07aa4a4f9803 to your computer and use it in GitHub Desktop.
Save phobson/cc9f5e9f07aa4a4f9803 to your computer and use it in GitHub Desktop.
tidy_scratch.py
%matplotlib inline
import numpy as np
import pandas
import matplotlib.pyplot as plt
import seaborn
seaborn.set(style='ticks', palette='deep', rc={'text.usetex': False})
import bmp
import utils
pollutants = ['Cadmium, Total', 'Copper, Total', 'Lead, Total', 'Zinc, Total']
BMPs = ['Bioretention', 'Detention Basin', 'Retention Pond', 'Wetland Basin']
# load data
database = bmp.Database("bmp/data/everything.csv", bmpcat_src="bmp/data/bmpcats.csv")
# select the subset of data we care about
# (data come x-tabbed with influent/effluent in the columns. The `stack`
# command moves the data back into rows)
metals = database.selectData(parameter=pollutants, category=BMPs)
def getROS(group):
mr = utils.ros.MR(group)
return mr.data
class DatasetCollection(object):
def __init__(self, dataframe, rescol='res', qualcol='qual',
stationcol='station', paramcol='parameter',
othergroups=None, useROS=True):
self.data = dataframe
self._raw_rescol = rescol
roscol = 'ros_' + rescol
if useROS:
self.rescol = roscol
else:
self.rescol = rescol
self.qualcol = qualcol
self.stationcol = stationcol
self.paramcol = paramcol
if othergroups is None:
othergroups = []
self.groupby = [stationcol, paramcol] + list([othergroups])
self.columns = self.groupby + [self._raw_rescol, self.qualcol]
self.tidy = self.data \
.stack(level=self.stationcol) \
.reset_index()[self.columns] \
.groupby(by=self.groupby) \
.apply(getROS) \
.reset_index() \
.rename(columns={'final_data': roscol})
for c in self.tidy.columns:
if c not in self.columns + [roscol]:
self.tidy = self.tidy \
.sort(columns=c) \
.reset_index() \
.drop([c, 'index'], axis=1)
dc = DatasetCollection(metals, othergroups='category')
dc.tidy[(dc.tidy.parameter=='Zinc, Total') & (dc.tidy.category == 'Bioretention')].head(10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment