Created
September 26, 2012 13:49
-
-
Save andreas-h/3788155 to your computer and use it in GitHub Desktop.
Design of a n-dimensional array-like object with dimension variables
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
***************************************************************************** | |
Design document for a ``gridded_array`` class | |
***************************************************************************** | |
:Author: Andreas Hilboll <[email protected]> | |
:Date: Wed Sep 26 14:48:57 2012 | |
This document wants to specify my need for an array-like data object which | |
stores dimension variables, i.e. information on what each dimension of the | |
array actually means. I call this class ``gridded_array``. | |
Ideally, the class would support ``datetime`` objects as dimension variable, | |
and it would be possible to easily down-sample (by averaging) and interpolate | |
to finer grids. | |
Most of the requirements would be fulfilled by the excellent ``pandas`` | |
library. However, ``pandas`` only supports up to three dimensions. Another | |
problem is that the nomenclature (``major_axis``, ``minor_axis``, ``items``) | |
is kind of unintuitive because the user always has to remember which dimension | |
is stored in which variable. If a ``pandas.core.index.MultiIndex`` would | |
better support gridded dimensions, and if this functionality would be | |
integrated into the core data objects in ``pandas``, this would probably be a | |
good solution. | |
About dimension variables | |
------------------------- | |
Dimension variables would need to be 1d arrays. I personally would need | |
floating point and datetime dtypes. Ideally, a dimension variable could be | |
cyclic. This could be realized with a ``dimvar._cyclic`` attribute defaulting | |
to ``None``, but possible having a value which 'makes sense', i.e. a tuple | |
``(begin, end)``. | |
""" | |
# Library imports | |
# =========================================================================== | |
from collections import OrderedDict | |
import unittest | |
from gridded_array import gridded_array | |
import numpy as np | |
# Test class definition | |
# =========================================================================== | |
class gridded_array_test(unittest.TestCase): | |
"""Specification of the ``gridded_array`` class.""" | |
def setup(self): | |
self.simple_dims = OrderedDict(longitude=np.linspace(-150., 150., 6), | |
latitude=np.linspace(-67.5, 67.5, 4)) | |
self.simple_garr = gridded_array(np.ones((6,4)), self.simple_dims) | |
# Basic functionality | |
# ------------------------------------------------------------------------ | |
def test_initialization(self): | |
"""Initialization of a ``gridded_array`` always requires the data. | |
If no dimension variables are given, they are created automatically as | |
``np.arange(len(dimension))``. | |
""" | |
# TODO: gridded_array(data) | |
# TODO: gridded_array(data, dims) | |
# TODO: gridded_array(existing_arr) | |
# TODO: gridded_array(existing_arr, dims) | |
# TODO: gridded_array(existing_marr) | |
# TODO: gridded_array(existing_marr, dims) | |
# TODO: dims as list(variables), list(names) | |
# TODO: only one dim, as "arr", "name" | |
pass | |
def test_add(self): | |
"""Addition should work as expected by returning the sum of two | |
``gridded_array`` as a new ``gridded_array``. Should raise an | |
exception if the dimensions of the two inputs do not agree.""" | |
# TODO: garr + garr | |
# TODO: garr + masked_array | |
# TODO: garr + array | |
# TODO: garr + scalar | |
# TODO: masked_array + garr | |
# TODO: array + garr | |
# TODO: scalar + garr | |
pass | |
def test_slicing(self): | |
"""Slicing should work as expected. Indexing should be possible both | |
via the indices and via the according dimensions.""" | |
# TODO: garr[0] | |
# TODO: garr[:,:,0] | |
# TODO: garr[:2] | |
# TODO: garr[:,5:8] | |
pass | |
def test_equal(self): | |
"""Equality should occur only when comparing a ``gridded_array`` with | |
another ``gridded_array``. Maybe say in the Exception if the | |
underlying data are identical? | |
Actually, I don't see a use-case for testing for equality (except for | |
writing tests, that is)""" | |
# TODO: garr == garr (True, False) | |
# TODO: garr == arr | |
# TODO: garr == marr | |
pass | |
def test_broadcast(self): | |
"""``gridded_array`` should support intelligent broadcasting based on | |
the dimension variables.""" | |
# TODO: (lat, lon) + (lon, lat) | |
# TODO: (lat, lon, alt) + (lon, lat) | |
# TODO: (alt, lat, lon) + (lat) | |
pass | |
def test_transpose(self): | |
"""It should be possible to transpose the array, i.e. swap the axes. | |
While this might seem unnecessary, it's important e.g. when writing | |
to file, and the writing method doesn't give the option to rearrange | |
the dimensions.""" | |
# TODO: (lat, lon).transpose(('lon', 'lat')) | |
# TODO: (alt, lat, lon).transpose(('lon', 'alt', 'lat') | |
pass | |
def test_to_array(self): | |
"""``gridded_array.to_array()`` returns a ``ndarray`` object, with | |
optionally transposed axes, and optionally filled missing values""" | |
# TODO: garr.to_array() | |
# TODO: garr.to_array(fill=999.) | |
# TODO: (lat, lon).to_array(dims=('lon', 'lat')) | |
pass | |
def test_to_masked_array(self): | |
"""Same as to_array, but returns masked_array""" | |
pass | |
def test_cut(self): | |
"""Cut out a sub-hypercube from the array object. """ | |
# TODO: garr(lat,lon,alt).cut(('lat', 10., 20)) -> return garr object | |
# which only contains those points with 'lat' dimension between | |
# 10 and 20. Should work for multiple dimensions simultaneously. | |
# TODO: garr.cut(0, -8, 8) | |
# TODO: garr.cut('x', -8, 8) | |
# TODO: garr.cut((0, -8, 8), ('y', -10, 0)) | |
pass | |
def test_advanced_indexing(self): | |
"""Indexing via arbitrary expressions of type bool""" | |
pass | |
# Optional functionality | |
# ------------------------------------------------------------------------ | |
def test_timeaxis(self): | |
"""Ideally, ``gridded_array`` would behave like a | |
``pandas.core.series.Series`` object when it comes to temporal | |
manipulations.""" | |
pass | |
def test_lt_gt(self): | |
"""No need to implement less-than and greater-than, IMO""" | |
pass | |
def test_axis_arg(self): | |
"""It would be great if all those class methods / numpy functions, | |
which take an ``axis`` parameter would work with the dimension's | |
name as argument instead of the numerical order of the dimension""" | |
pass | |
# Convenience functions to call something from an external lib | |
# ------------------------------------------------------------------------ | |
def test_downsample(self): | |
"""Support generating a new ``gridded_array`` by down-sampling""" | |
# TODO: downsample in one dim | |
# TODO: dowmsample in two dims | |
# TODO: downsample in three dims | |
pass | |
def test_interpolate(self): | |
"""Interpolation to a finer grid. Should be a conveniance function | |
to call something appropriate from ``scipy.interpolate``.""" | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment