Skip to content

Instantly share code, notes, and snippets.

@eleanxr
Last active April 9, 2018 21:32
Show Gist options
  • Save eleanxr/b549bc3b278456121700 to your computer and use it in GitHub Desktop.
Save eleanxr/b549bc3b278456121700 to your computer and use it in GitHub Desktop.
Raster Hydrograph and Flow Gaps
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid
# From the stack overflow answer at
# http://stackoverflow.com/questions/7404116/defining-the-midpoint-of-a-colormap-in-matplotlib
def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
'''
Function to offset the "center" of a colormap. Useful for
data with a negative min and positive max and you want the
middle of the colormap's dynamic range to be at zero
Input
-----
cmap : The matplotlib colormap to be altered
start : Offset from lowest point in the colormap's range.
Defaults to 0.0 (no lower ofset). Should be between
0.0 and `midpoint`.
midpoint : The new center of the colormap. Defaults to
0.5 (no shift). Should be between 0.0 and 1.0. In
general, this should be 1 - vmax/(vmax + abs(vmin))
For example if your data range from -15.0 to +5.0 and
you want the center of the colormap at 0.0, `midpoint`
should be set to 1 - 5/(5 + 15)) or 0.75
stop : Offset from highets point in the colormap's range.
Defaults to 1.0 (no upper ofset). Should be between
`midpoint` and 1.0.
'''
cdict = {
'red': [],
'green': [],
'blue': [],
'alpha': []
}
# regular index to compute the colors
reg_index = np.linspace(start, stop, 257)
# shifted index to match the data
shift_index = np.hstack([
np.linspace(0.0, midpoint, 128, endpoint=False),
np.linspace(midpoint, 1.0, 129, endpoint=True)
])
for ri, si in zip(reg_index, shift_index):
r, g, b, a = cmap(ri)
cdict['red'].append((si, r, r))
cdict['green'].append((si, g, g))
cdict['blue'].append((si, b, b))
cdict['alpha'].append((si, a, a))
newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
plt.register_cmap(cmap=newcmap)
return newcmap
USGS_SITES = [
"06043500",
#"06044000",
"06045000",
"06045500",
"06052500"
]
USGS_SITE_NAMES = [
"Gallatin Gateway",
#"Salesville",
"Axtell Bridge",
"Belgrade",
"Logan"
]
"""
Module to read data in the RDB format from the USGS water data
web service. An example URL call is:
http://waterservices.usgs.gov/nwis/dv/?format=rdb&indent=on&sites=06043500&startDT=1930-01-01&endDT=2014-12-31&statCd=00003
USGS URL builder:
http://waterservices.usgs.gov/rest/DV-Test-Tool.html
Choose the USGS RDB format as output.
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import matplotlib.ticker as ticker
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import LogNorm
import matplotlib.cm
import calendar
import colormap
import usgs_data
WATER_RIGHT_BOUNDARIES = [pd.Timestamp("2000-05-15").dayofyear, pd.Timestamp('2000-07-15').dayofyear]
CFS_DAY_TO_AF = 1.9835
def read_data(site_id, start_date, end_date):
"""
Read data for the given USGS site id from start_date to
end_date. Adds derived attributes for flow gap data.
"""
data = usgs_data.get_flow_data(site_id, start_date, end_date)
# Add new columns for easy pivoting.
data["dayofyear"] = data.index.dayofyear
data["year"] = data.index.year
data["month"] = data.index.month
# Append the derived attributes
add_flow_gap_attributes(data)
return data
def get_targets(row):
"""
Create a dataset with e-flow targets given boundaries throughout the
year.
"""
current_day = pd.Timestamp(row['date']).dayofyear
if WATER_RIGHT_BOUNDARIES[0] <= current_day and current_day <= WATER_RIGHT_BOUNDARIES[1]:
return 800
else:
return 400
def compute_gap(row):
"""
Calculate the difference between actual flow and the instream flow target.
"""
return row['flow'] - row['e-flow-target']
def mark_deficit(row):
"""
Mark days where the actual flow does not mean the instream flow value.
"""
return 1 if row['e-flow-gap'] < 0.0 else 0
def create_raster_table(data, value, ascending = True):
"""
Creates the raster table from the dataframe using
year and day of year as indices and the specified
value attribute as the value.
"""
return data.pivot(index = 'year', columns = 'dayofyear', values = value).sort_index(ascending=ascending)
def create_yearly_totals(data, attributes):
"""
Sum yearly totals for a given set of attribute.
"""
sums = map(lambda a: create_raster_table(data, a).sum(axis = 1), attributes)
result = pd.concat(sums, axis = 1)
return result
def raster_plot(data, value, title, colormap=None, norm=None,
show_colorbar=False):
"""
Create a raster plot of a given attribute with day of year on the
x-axis and year on the y-axis.
"""
raster_table = create_raster_table(data, value, ascending = False)
extent = [0, 365, raster_table.index.min(), raster_table.index.max()]
min_value = data.min()[value]
max_value = data.max()[value]
plot = plt.imshow(raster_table, interpolation = 'nearest', aspect='auto',
extent = extent, cmap=colormap, norm=norm)
if show_colorbar:
colorbar = plt.colorbar()
#colorbar.set_ticks([data.min()[value], 0, data.max()[value]])
#colorbar.set_ticklabels([data.min()[value], 0, data.max()[value]])
axes = plot.get_axes()
axes.set_xlabel("Month")
axes.set_ylabel("Year")
months = pd.date_range("1/1/2015", periods=12, freq="M")
half_months = months.shift(15, freq="D")
#axes.set_xticks(months)
major_locator = ticker.FixedLocator(months.map(lambda d: d.dayofyear))
minor_locator = ticker.FixedLocator(half_months.map(lambda d: d.dayofyear))
axes.xaxis.set_major_locator(major_locator)
axes.xaxis.set_minor_locator(minor_locator)
axes.xaxis.set_major_formatter(ticker.NullFormatter())
minor_formatter = month_formatter()
axes.xaxis.set_minor_formatter(minor_formatter)
plt.title(title)
def month_formatter():
"""
Get a matplotlib fixed formatter that will label months by their
middle day.
"""
months = pd.date_range("1/1/2015", periods=12, freq="M")
half_months = months.shift(15, freq="D")
return ticker.FixedFormatter(half_months.map(lambda d: calendar.month_abbr[d.month]))
def add_flow_gap_attributes(data):
"""
Add instream flow target attributes.
"""
data['e-flow-target'] = pd.Series(data.reset_index().apply(get_targets, axis = 1).values, index=data.index)
data['e-flow-gap'] = data.apply(compute_gap, axis = 1)
data['e-flow-gap-af'] = data['e-flow-gap'] * CFS_DAY_TO_AF
data['deficit'] = data.apply(mark_deficit, axis = 1)
def plot_raster_hydrograph(site, start_date, end_date, attribute, title):
"""
Plot a raster hydrograph with a given attribute.
site: The USGS site id
start_date: The start date for the plot
end_date: The end date for the plot
attribute: the attribute to plot
title: The title of the plot.
"""
data = read_data(site, start_date, end_date)
add_flow_gap_attributes(data)
colormap = create_colormap(data, attribute, matplotlib.cm.coolwarm)
colormap.set_bad("black")
raster_plot(data, attribute, title, colormap=colormap, show_colorbar=True)
def plot_monthly_statistics(data, attribute, title):
"""
Plot the month-by-month statistics for a given attribute
in a box plot. Will display median, IQR, +/- 1.5*IQR, and
outliers.
"""
plot = data.boxplot(attribute, by='month')
plt.title(title)
axes = plot.get_axes()
months = pd.date_range("1/1/2015", periods=12, freq="M")
axes.xaxis.set_major_formatter(ticker.FixedFormatter(months.map(lambda d: calendar.month_abbr[d.month])))
def create_colormap(data, attribute, source_map):
"""
Create a colormap given a particular dataset. It
will set the minimum value and maximum value to the dataset
minimum and maximum and sets a zero point based on the
data.
"""
min_value = data.min()[attribute]
max_value = data.max()[attribute]
size = max_value - min_value
zero = abs(min_value) / size
return colormap.shiftedColorMap(source_map, midpoint = zero)
def compare_sites(site_ids, start_date, end_date, attribute, names=None):
datasets = map(lambda site: read_data(site, start_date, end_date), site_ids)
columns = map(lambda d: d[attribute], datasets)
join = pd.concat(columns, axis=1)
if names and len(names) == len(site_ids):
join.columns = names
else:
join.columns = site_ids
return join
def main():
import sys
if len(sys.argv) < 6:
print "Usage: %s <site_id> <start_date> <end_date> <attribute> <title>" % sys.argv[0]
sys.exit(-1)
site_id = sys.argv[1]
start_date = sys.argv[2]
end_date = sys.argv[3]
attribute = sys.argv[4]
title = sys.argv[5]
plot_raster_hydrograph(site_id, start_date, end_date, attribute, title)
plt.show()
if __name__ == '__main__':
main()
gnureadline==6.3.3
ipython==3.1.0
jdcal==1.0
matplotlib==1.4.3
mock==1.0.1
nose==1.3.6
numpy==1.9.2
openpyxl==1.8.6
pandas==0.16.1
pyparsing==2.0.3
python-dateutil==2.4.2
pytz==2015.2
scipy==0.15.1
six==1.9.0
xlwt==1.0.0
import requests
import pandas as pd
import datetime
def format_url(site, from_str, to_str):
baseurl = "http://waterservices.usgs.gov/nwis/dv/?format=rdb&indent=on&sites=%s&startDT=%s&endDT=%s&statCd=00003&parameterCd=00060"
return baseurl % (site, from_str, to_str)
def dateparse(s):
return pd.datetime.strptime(s, "%Y-%m-%d")
def get_flow_data(site_id, start_date, end_date):
"""
Download USGS flow data using waterservices.usgs.gov.
site_id: The USGS gage ID
start_date: The starting date for the data
end_date: The end date for the data
Returns a Pandas time series with the data.
"""
from_str = start_date.isoformat() if isinstance(start_date, datetime.date) else start_date
to_str = end_date.isoformat() if isinstance(end_date, datetime.date) else end_date
url = format_url(site_id, from_str, to_str)
data = pd.read_csv(
url,
comment='#',
delimiter="\t",
usecols=[2, 3],
header=1,
parse_dates=["date"],
date_parser=dateparse,
index_col='date',
names=['date', 'flow'],
na_values='Ice')
return data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment