eleanxr · April 9, 2018 21:32
diff --git a/.gitignore b/.gitignore
 *.pyc
diff --git a/colormap.py b/colormap.py
 import numpy as np
 import matplotlib
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import AxesGrid

 # From the stack overflow answer at
 # http://stackoverflow.com/questions/7404116/defining-the-midpoint-of-a-colormap-in-matplotlib

 def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
    '''
    Function to offset the "center" of a colormap. Useful for
    data with a negative min and positive max and you want the
    middle of the colormap's dynamic range to be at zero

    Input
    -----
      cmap : The matplotlib colormap to be altered
      start : Offset from lowest point in the colormap's range.
          Defaults to 0.0 (no lower ofset). Should be between
          0.0 and `midpoint`.
      midpoint : The new center of the colormap. Defaults to 
          0.5 (no shift). Should be between 0.0 and 1.0. In
          general, this should be  1 - vmax/(vmax + abs(vmin))
          For example if your data range from -15.0 to +5.0 and
          you want the center of the colormap at 0.0, `midpoint`
          should be set to  1 - 5/(5 + 15)) or 0.75
      stop : Offset from highets point in the colormap's range.
          Defaults to 1.0 (no upper ofset). Should be between
          `midpoint` and 1.0.
    '''
    cdict = {
        'red': [],
        'green': [],
        'blue': [],
        'alpha': []
    }

    # regular index to compute the colors
    reg_index = np.linspace(start, stop, 257)

    # shifted index to match the data
    shift_index = np.hstack([
        np.linspace(0.0, midpoint, 128, endpoint=False), 
        np.linspace(midpoint, 1.0, 129, endpoint=True)
    ])

    for ri, si in zip(reg_index, shift_index):
        r, g, b, a = cmap(ri)

        cdict['red'].append((si, r, r))
        cdict['green'].append((si, g, g))
        cdict['blue'].append((si, b, b))
        cdict['alpha'].append((si, a, a))

    newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
    plt.register_cmap(cmap=newcmap)

    return newcmap
diff --git a/gallatin.py b/gallatin.py

 USGS_SITES = [
    "06043500",
    #"06044000",
    "06045000",
    "06045500",
    "06052500"
 ]

 USGS_SITE_NAMES = [
    "Gallatin Gateway",
    #"Salesville",
    "Axtell Bridge",
    "Belgrade",
    "Logan"
 ]
diff --git a/rasterflow.py b/rasterflow.py
 """
 Module to read data in the RDB format from the USGS water data
 web service. An example URL call is:
 http://waterservices.usgs.gov/nwis/dv/?format=rdb&indent=on&sites=06043500&startDT=1930-01-01&endDT=2014-12-31&statCd=00003

 USGS URL builder:
 http://waterservices.usgs.gov/rest/DV-Test-Tool.html

 Choose the USGS RDB format as output.
 """
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.dates as dates
 import matplotlib.ticker as ticker
 from matplotlib.colors import LinearSegmentedColormap
 from matplotlib.colors import LogNorm
 import matplotlib.cm
 import calendar
 import colormap
 import usgs_data

 WATER_RIGHT_BOUNDARIES = [pd.Timestamp("2000-05-15").dayofyear, pd.Timestamp('2000-07-15').dayofyear]

 CFS_DAY_TO_AF = 1.9835

 def read_data(site_id, start_date, end_date):
    """
    Read data for the given USGS site id from start_date to
    end_date. Adds derived attributes for flow gap data.
    """
    data = usgs_data.get_flow_data(site_id, start_date, end_date)
    
    # Add new columns for easy pivoting.
    data["dayofyear"] = data.index.dayofyear
    data["year"] = data.index.year
    data["month"] = data.index.month
    
    # Append the derived attributes
    add_flow_gap_attributes(data)
    
    return data

 def get_targets(row):
    """
    Create a dataset with e-flow targets given boundaries throughout the
    year.
    """
    current_day = pd.Timestamp(row['date']).dayofyear
    if WATER_RIGHT_BOUNDARIES[0] <= current_day and current_day <= WATER_RIGHT_BOUNDARIES[1]:
        return 800
    else:
        return 400

 def compute_gap(row):
    """
    Calculate the difference between actual flow and the instream flow target.
    """
    return row['flow'] - row['e-flow-target']

 def mark_deficit(row):
    """
    Mark days where the actual flow does not mean the instream flow value.
    """
    return 1 if row['e-flow-gap'] < 0.0 else 0

 def create_raster_table(data, value, ascending = True):
    """
    Creates the raster table from the dataframe using
    year and day of year as indices and the specified
    value attribute as the value.
    """
    return data.pivot(index = 'year', columns = 'dayofyear', values = value).sort_index(ascending=ascending)

 def create_yearly_totals(data, attributes):
    """
    Sum yearly totals for a given set of attribute.
    """
    sums = map(lambda a: create_raster_table(data, a).sum(axis = 1), attributes)
    result = pd.concat(sums, axis = 1)
    return result

 def raster_plot(data, value, title, colormap=None, norm=None,
                show_colorbar=False):
    """
    Create a raster plot of a given attribute with day of year on the
    x-axis and year on the y-axis.
    """
    raster_table = create_raster_table(data, value, ascending = False)
    extent = [0, 365, raster_table.index.min(), raster_table.index.max()]
    min_value = data.min()[value]
    max_value = data.max()[value]
    
    plot = plt.imshow(raster_table, interpolation = 'nearest', aspect='auto',
                      extent = extent, cmap=colormap, norm=norm)
    if show_colorbar:
        colorbar = plt.colorbar()
        #colorbar.set_ticks([data.min()[value], 0, data.max()[value]])
        #colorbar.set_ticklabels([data.min()[value], 0, data.max()[value]])
    
    axes = plot.get_axes()
    axes.set_xlabel("Month")
    axes.set_ylabel("Year")
    
    months = pd.date_range("1/1/2015", periods=12, freq="M")
    half_months = months.shift(15, freq="D")
    #axes.set_xticks(months)
    major_locator = ticker.FixedLocator(months.map(lambda d: d.dayofyear))
    minor_locator = ticker.FixedLocator(half_months.map(lambda d: d.dayofyear))
    axes.xaxis.set_major_locator(major_locator)
    axes.xaxis.set_minor_locator(minor_locator)
    axes.xaxis.set_major_formatter(ticker.NullFormatter())
    minor_formatter = month_formatter()
    axes.xaxis.set_minor_formatter(minor_formatter)
    
    plt.title(title)
    
 def month_formatter():
    """
    Get a matplotlib fixed formatter that will label months by their
    middle day.
    """
    months = pd.date_range("1/1/2015", periods=12, freq="M")
    half_months = months.shift(15, freq="D")
    return ticker.FixedFormatter(half_months.map(lambda d: calendar.month_abbr[d.month]))
    
 def add_flow_gap_attributes(data):
    """
    Add instream flow target attributes.
    """
    data['e-flow-target'] = pd.Series(data.reset_index().apply(get_targets, axis = 1).values, index=data.index)
    data['e-flow-gap'] = data.apply(compute_gap, axis = 1)
    data['e-flow-gap-af'] = data['e-flow-gap'] * CFS_DAY_TO_AF
    data['deficit'] = data.apply(mark_deficit, axis = 1)
    
 def plot_raster_hydrograph(site, start_date, end_date, attribute, title):
    """
    Plot a raster hydrograph with a given attribute.
    site: The USGS site id
    start_date: The start date for the plot
    end_date: The end date for the plot
    attribute: the attribute to plot
    title: The title of the plot.
    """
    data = read_data(site, start_date, end_date)
    
    add_flow_gap_attributes(data)
    
    colormap = create_colormap(data, attribute, matplotlib.cm.coolwarm)
    colormap.set_bad("black")
    raster_plot(data, attribute, title, colormap=colormap, show_colorbar=True)
    
 def plot_monthly_statistics(data, attribute, title):
    """
    Plot the month-by-month statistics for a given attribute
    in a box plot. Will display median, IQR, +/- 1.5*IQR, and
    outliers.
    """
    plot = data.boxplot(attribute, by='month')
    plt.title(title)
    
    axes = plot.get_axes()
    
    months = pd.date_range("1/1/2015", periods=12, freq="M")
    
    axes.xaxis.set_major_formatter(ticker.FixedFormatter(months.map(lambda d: calendar.month_abbr[d.month])))
    
 def create_colormap(data, attribute, source_map):
    """
    Create a colormap given a particular dataset. It
    will set the minimum value and maximum value to the dataset
    minimum and maximum and sets a zero point based on the
    data.
    """
    min_value = data.min()[attribute]
    max_value = data.max()[attribute]
    size = max_value - min_value
    zero = abs(min_value) / size
    return colormap.shiftedColorMap(source_map, midpoint = zero)

 def compare_sites(site_ids, start_date, end_date, attribute, names=None):
    datasets = map(lambda site: read_data(site, start_date, end_date), site_ids)
    columns = map(lambda d: d[attribute], datasets)
    join = pd.concat(columns, axis=1)
    if names and len(names) == len(site_ids):
        join.columns = names
    else:
        join.columns = site_ids
    return join

 def main():
    import sys
    if len(sys.argv) < 6:
        print "Usage: %s <site_id> <start_date> <end_date> <attribute> <title>" % sys.argv[0]
        sys.exit(-1)

    site_id = sys.argv[1]
    start_date = sys.argv[2]
    end_date = sys.argv[3]
    attribute = sys.argv[4]
    title = sys.argv[5]

    plot_raster_hydrograph(site_id, start_date, end_date, attribute, title)
    plt.show()

 if __name__ == '__main__':
    main()
diff --git a/requirements.txt b/requirements.txt
 gnureadline==6.3.3
 ipython==3.1.0
 jdcal==1.0
 matplotlib==1.4.3
 mock==1.0.1
 nose==1.3.6
 numpy==1.9.2
 openpyxl==1.8.6
 pandas==0.16.1
 pyparsing==2.0.3
 python-dateutil==2.4.2
 pytz==2015.2
 scipy==0.15.1
 six==1.9.0
 xlwt==1.0.0
diff --git a/usgs_data.py b/usgs_data.py
 import requests
 import pandas as pd
 import datetime

 def format_url(site, from_str, to_str):
    baseurl = "http://waterservices.usgs.gov/nwis/dv/?format=rdb&indent=on&sites=%s&startDT=%s&endDT=%s&statCd=00003&parameterCd=00060"
    return baseurl  % (site, from_str, to_str)

 def dateparse(s):
    return pd.datetime.strptime(s, "%Y-%m-%d")

 def get_flow_data(site_id, start_date, end_date):
    """
    Download USGS flow data using waterservices.usgs.gov.
    site_id: The USGS gage ID
    start_date: The starting date for the data
    end_date: The end date for the data
    Returns a Pandas time series with the data.
    """
    from_str = start_date.isoformat() if isinstance(start_date, datetime.date) else start_date
    to_str = end_date.isoformat() if isinstance(end_date, datetime.date) else end_date
    url = format_url(site_id, from_str, to_str)
    data = pd.read_csv(
        url,
        comment='#',
        delimiter="\t",
        usecols=[2, 3],
        header=1,
        parse_dates=["date"],
        date_parser=dateparse,
        index_col='date',
        names=['date', 'flow'],
        na_values='Ice')
    return data
	import numpy as np
	import matplotlib
	import matplotlib.pyplot as plt
	from mpl_toolkits.axes_grid1 import AxesGrid

	# From the stack overflow answer at
	# http://stackoverflow.com/questions/7404116/defining-the-midpoint-of-a-colormap-in-matplotlib

	def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
	'''
	Function to offset the "center" of a colormap. Useful for
	data with a negative min and positive max and you want the
	middle of the colormap's dynamic range to be at zero

	Input
	-----
	cmap : The matplotlib colormap to be altered
	start : Offset from lowest point in the colormap's range.
	Defaults to 0.0 (no lower ofset). Should be between
	0.0 and `midpoint`.
	midpoint : The new center of the colormap. Defaults to
	0.5 (no shift). Should be between 0.0 and 1.0. In
	general, this should be 1 - vmax/(vmax + abs(vmin))
	For example if your data range from -15.0 to +5.0 and
	you want the center of the colormap at 0.0, `midpoint`
	should be set to 1 - 5/(5 + 15)) or 0.75
	stop : Offset from highets point in the colormap's range.
	Defaults to 1.0 (no upper ofset). Should be between
	`midpoint` and 1.0.
	'''
	cdict = {
	'red': [],
	'green': [],
	'blue': [],
	'alpha': []
	}

	# regular index to compute the colors
	reg_index = np.linspace(start, stop, 257)

	# shifted index to match the data
	shift_index = np.hstack([
	np.linspace(0.0, midpoint, 128, endpoint=False),
	np.linspace(midpoint, 1.0, 129, endpoint=True)
	])

	for ri, si in zip(reg_index, shift_index):
	r, g, b, a = cmap(ri)

	cdict['red'].append((si, r, r))
	cdict['green'].append((si, g, g))
	cdict['blue'].append((si, b, b))
	cdict['alpha'].append((si, a, a))

	newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
	plt.register_cmap(cmap=newcmap)

	return newcmap

	USGS_SITES = [
	"06043500",
	#"06044000",
	"06045000",
	"06045500",
	"06052500"
	]

	USGS_SITE_NAMES = [
	"Gallatin Gateway",
	#"Salesville",
	"Axtell Bridge",
	"Belgrade",
	"Logan"
	]
	"""
	Module to read data in the RDB format from the USGS water data
	web service. An example URL call is:
	http://waterservices.usgs.gov/nwis/dv/?format=rdb&indent=on&sites=06043500&startDT=1930-01-01&endDT=2014-12-31&statCd=00003

	USGS URL builder:
	http://waterservices.usgs.gov/rest/DV-Test-Tool.html

	Choose the USGS RDB format as output.
	"""
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import matplotlib.dates as dates
	import matplotlib.ticker as ticker
	from matplotlib.colors import LinearSegmentedColormap
	from matplotlib.colors import LogNorm
	import matplotlib.cm
	import calendar
	import colormap
	import usgs_data

	WATER_RIGHT_BOUNDARIES = [pd.Timestamp("2000-05-15").dayofyear, pd.Timestamp('2000-07-15').dayofyear]

	CFS_DAY_TO_AF = 1.9835

	def read_data(site_id, start_date, end_date):
	"""
	Read data for the given USGS site id from start_date to
	end_date. Adds derived attributes for flow gap data.
	"""
	data = usgs_data.get_flow_data(site_id, start_date, end_date)

	# Add new columns for easy pivoting.
	data["dayofyear"] = data.index.dayofyear
	data["year"] = data.index.year
	data["month"] = data.index.month

	# Append the derived attributes
	add_flow_gap_attributes(data)

	return data

	def get_targets(row):
	"""
	Create a dataset with e-flow targets given boundaries throughout the
	year.
	"""
	current_day = pd.Timestamp(row['date']).dayofyear
	if WATER_RIGHT_BOUNDARIES[0] <= current_day and current_day <= WATER_RIGHT_BOUNDARIES[1]:
	return 800
	else:
	return 400

	def compute_gap(row):
	"""
	Calculate the difference between actual flow and the instream flow target.
	"""
	return row['flow'] - row['e-flow-target']

	def mark_deficit(row):
	"""
	Mark days where the actual flow does not mean the instream flow value.
	"""
	return 1 if row['e-flow-gap'] < 0.0 else 0

	def create_raster_table(data, value, ascending = True):
	"""
	Creates the raster table from the dataframe using
	year and day of year as indices and the specified
	value attribute as the value.
	"""
	return data.pivot(index = 'year', columns = 'dayofyear', values = value).sort_index(ascending=ascending)

	def create_yearly_totals(data, attributes):
	"""
	Sum yearly totals for a given set of attribute.
	"""
	sums = map(lambda a: create_raster_table(data, a).sum(axis = 1), attributes)
	result = pd.concat(sums, axis = 1)
	return result

	def raster_plot(data, value, title, colormap=None, norm=None,
	show_colorbar=False):
	"""
	Create a raster plot of a given attribute with day of year on the
	x-axis and year on the y-axis.
	"""
	raster_table = create_raster_table(data, value, ascending = False)
	extent = [0, 365, raster_table.index.min(), raster_table.index.max()]
	min_value = data.min()[value]
	max_value = data.max()[value]

	plot = plt.imshow(raster_table, interpolation = 'nearest', aspect='auto',
	extent = extent, cmap=colormap, norm=norm)
	if show_colorbar:
	colorbar = plt.colorbar()
	#colorbar.set_ticks([data.min()[value], 0, data.max()[value]])
	#colorbar.set_ticklabels([data.min()[value], 0, data.max()[value]])

	axes = plot.get_axes()
	axes.set_xlabel("Month")
	axes.set_ylabel("Year")

	months = pd.date_range("1/1/2015", periods=12, freq="M")
	half_months = months.shift(15, freq="D")
	#axes.set_xticks(months)
	major_locator = ticker.FixedLocator(months.map(lambda d: d.dayofyear))
	minor_locator = ticker.FixedLocator(half_months.map(lambda d: d.dayofyear))
	axes.xaxis.set_major_locator(major_locator)
	axes.xaxis.set_minor_locator(minor_locator)
	axes.xaxis.set_major_formatter(ticker.NullFormatter())
	minor_formatter = month_formatter()
	axes.xaxis.set_minor_formatter(minor_formatter)

	plt.title(title)

	def month_formatter():
	"""
	Get a matplotlib fixed formatter that will label months by their
	middle day.
	"""
	months = pd.date_range("1/1/2015", periods=12, freq="M")
	half_months = months.shift(15, freq="D")
	return ticker.FixedFormatter(half_months.map(lambda d: calendar.month_abbr[d.month]))

	def add_flow_gap_attributes(data):
	"""
	Add instream flow target attributes.
	"""
	data['e-flow-target'] = pd.Series(data.reset_index().apply(get_targets, axis = 1).values, index=data.index)
	data['e-flow-gap'] = data.apply(compute_gap, axis = 1)
	data['e-flow-gap-af'] = data['e-flow-gap'] * CFS_DAY_TO_AF
	data['deficit'] = data.apply(mark_deficit, axis = 1)

	def plot_raster_hydrograph(site, start_date, end_date, attribute, title):
	"""
	Plot a raster hydrograph with a given attribute.
	site: The USGS site id
	start_date: The start date for the plot
	end_date: The end date for the plot
	attribute: the attribute to plot
	title: The title of the plot.
	"""
	data = read_data(site, start_date, end_date)

	add_flow_gap_attributes(data)

	colormap = create_colormap(data, attribute, matplotlib.cm.coolwarm)
	colormap.set_bad("black")
	raster_plot(data, attribute, title, colormap=colormap, show_colorbar=True)

	def plot_monthly_statistics(data, attribute, title):
	"""
	Plot the month-by-month statistics for a given attribute
	in a box plot. Will display median, IQR, +/- 1.5*IQR, and
	outliers.
	"""
	plot = data.boxplot(attribute, by='month')
	plt.title(title)

	axes = plot.get_axes()

	months = pd.date_range("1/1/2015", periods=12, freq="M")

	axes.xaxis.set_major_formatter(ticker.FixedFormatter(months.map(lambda d: calendar.month_abbr[d.month])))

	def create_colormap(data, attribute, source_map):
	"""
	Create a colormap given a particular dataset. It
	will set the minimum value and maximum value to the dataset
	minimum and maximum and sets a zero point based on the
	data.
	"""
	min_value = data.min()[attribute]
	max_value = data.max()[attribute]
	size = max_value - min_value
	zero = abs(min_value) / size
	return colormap.shiftedColorMap(source_map, midpoint = zero)

	def compare_sites(site_ids, start_date, end_date, attribute, names=None):
	datasets = map(lambda site: read_data(site, start_date, end_date), site_ids)
	columns = map(lambda d: d[attribute], datasets)
	join = pd.concat(columns, axis=1)
	if names and len(names) == len(site_ids):
	join.columns = names
	else:
	join.columns = site_ids
	return join

	def main():
	import sys
	if len(sys.argv) < 6:
	print "Usage: %s <site_id> <start_date> <end_date> <attribute> <title>" % sys.argv[0]
	sys.exit(-1)

	site_id = sys.argv[1]
	start_date = sys.argv[2]
	end_date = sys.argv[3]
	attribute = sys.argv[4]
	title = sys.argv[5]

	plot_raster_hydrograph(site_id, start_date, end_date, attribute, title)
	plt.show()

	if __name__ == '__main__':
	main()
	gnureadline==6.3.3
	ipython==3.1.0
	jdcal==1.0
	matplotlib==1.4.3
	mock==1.0.1
	nose==1.3.6
	numpy==1.9.2
	openpyxl==1.8.6
	pandas==0.16.1
	pyparsing==2.0.3
	python-dateutil==2.4.2
	pytz==2015.2
	scipy==0.15.1
	six==1.9.0
	xlwt==1.0.0
	import requests
	import pandas as pd
	import datetime

	def format_url(site, from_str, to_str):
	baseurl = "http://waterservices.usgs.gov/nwis/dv/?format=rdb&indent=on&sites=%s&startDT=%s&endDT=%s&statCd=00003&parameterCd=00060"
	return baseurl % (site, from_str, to_str)

	def dateparse(s):
	return pd.datetime.strptime(s, "%Y-%m-%d")

	def get_flow_data(site_id, start_date, end_date):
	"""
	Download USGS flow data using waterservices.usgs.gov.
	site_id: The USGS gage ID
	start_date: The starting date for the data
	end_date: The end date for the data
	Returns a Pandas time series with the data.
	"""
	from_str = start_date.isoformat() if isinstance(start_date, datetime.date) else start_date
	to_str = end_date.isoformat() if isinstance(end_date, datetime.date) else end_date
	url = format_url(site_id, from_str, to_str)
	data = pd.read_csv(
	url,
	comment='#',
	delimiter="\t",
	usecols=[2, 3],
	header=1,
	parse_dates=["date"],
	date_parser=dateparse,
	index_col='date',
	names=['date', 'flow'],
	na_values='Ice')
	return data