Skip to content

Instantly share code, notes, and snippets.

@seberg
Created August 22, 2012 22:39
Show Gist options
  • Save seberg/3430219 to your computer and use it in GitHub Desktop.
Save seberg/3430219 to your computer and use it in GitHub Desktop.
numy stride tricks based functions for changing the shape and axis order of arrays.
import numpy as np
from collections import defaultdict as _dd
def rolling_window(array, window=(0,), asteps=None, wsteps=None, axes=None, toend=True):
"""Create a view of `array` which for every point gives the n-dimensional
neighbourhood of size window. New dimensions are added at the end of
`array` or after the corresponding original dimension.
Parameters
----------
array : array_like
Array to which the rolling window is applied.
window : int or tuple
Either a single integer to create a window of only the last axis or a
tuple to create it for the last len(window) axes. 0 can be used as a
to ignore a dimension in the window.
asteps : tuple
Aligned at the last axis, new steps for the original array, ie. for
creation of non-overlapping windows. (Equivalent to slicing result)
wsteps : int or tuple (same size as window)
steps for the added window dimensions. These can be 0 to repeat values
along the axis.
axes: int or tuple
If given, must have the same size as window. In this case window is
interpreted as the size in the dimension given by axes. IE. a window
of (2, 1) is equivalent to window=2 and axis=-2.
toend : bool
If False, the new dimensions are right after the corresponding original
dimension, instead of at the end of the array. Adding the new axes at the
end makes it easier to get the neighborhood, however toend=False will give
a more intuitive result if you view the whole array.
Returns
-------
A view on `array` which is smaller to fit the windows and has windows added
dimensions (0s not counting), ie. every point of `array` is an array of size
window.
Examples
--------
>>> a = np.arange(9).reshape(3,3)
>>> rolling_window(a, (2,2))
array([[[[0, 1],
[3, 4]],
[[1, 2],
[4, 5]]],
[[[3, 4],
[6, 7]],
[[4, 5],
[7, 8]]]])
Or to create non-overlapping windows, but only along the first dimension:
>>> rolling_window(a, (2,0), asteps=(2,1))
array([[[0, 3],
[1, 4],
[2, 5]]])
Note that the 0 is discared, so that the output dimension is 3:
>>> rolling_window(a, (2,0), asteps=(2,1)).shape
(1, 3, 2)
This is useful for example to calculate the maximum in all (overlapping)
2x2 submatrixes:
>>> rolling_window(a, (2,2)).max((2,3))
array([[4, 5],
[7, 8]])
Or delay embedding (3D embedding with delay 2):
>>> x = np.arange(10)
>>> rolling_window(x, 3, wsteps=2)
array([[0, 2, 4],
[1, 3, 5],
[2, 4, 6],
[3, 5, 7],
[4, 6, 8],
[5, 7, 9]])
"""
array = np.asarray(array)
orig_shape = np.asarray(array.shape)
window = np.atleast_1d(window).astype(int) # maybe crude to cast to int...
if axes is not None:
axes = np.atleast_1d(axes)
w = np.zeros(array.ndim, dtype=int)
for axis, size in zip(axes, window):
w[axis] = size
window = w
# Check if window is legal:
if window.ndim > 1:
raise ValueError("`window` must be one-dimensional.")
if np.any(window < 0):
raise ValueError("All elements of `window` must be larger then 1.")
if len(array.shape) < len(window):
raise ValueError("`window` length must be less or equal `array` dimension.")
_asteps = np.ones_like(orig_shape)
if asteps is not None:
asteps = np.atleast_1d(asteps)
if asteps.ndim != 1:
raise ValueError("`asteps` must be either a scalar or one dimensional.")
if len(asteps) > array.ndim:
raise ValueError("`asteps` cannot be longer then the `array` dimension.")
# does not enforce alignment, so that steps can be same as window too.
_asteps[-len(asteps):] = asteps
if np.any(asteps < 1):
raise ValueError("All elements of `asteps` must be larger then 1.")
asteps = _asteps
_wsteps = np.ones_like(window)
if wsteps is not None:
wsteps = np.atleast_1d(wsteps)
if wsteps.shape != window.shape:
raise ValueError("`wsteps` must have the same shape as `window`.")
if np.any(wsteps < 0):
raise ValueError("All elements of `wsteps` must be larger then 0.")
_wsteps[:] = wsteps
_wsteps[window == 0] = 1 # make sure that steps are 1 for non-existing dims.
wsteps = _wsteps
# Check that the window would not be larger then the original:
if np.any(orig_shape[-len(window):] < window * wsteps):
raise ValueError("`window` * `wsteps` larger then `array` in at least one dimension.")
new_shape = orig_shape # just renaming...
# For calculating the new shape 0s must act like 1s:
_window = window.copy()
_window[_window==0] = 1
new_shape[-len(window):] += wsteps - _window * wsteps
new_shape = (new_shape + asteps - 1) // asteps
# make sure the new_shape is at least 1 in any "old" dimension (ie. steps
# is (too) large, but we do not care.
new_shape[new_shape < 1] = 1
shape = new_shape
strides = np.asarray(array.strides)
strides *= asteps
new_strides = array.strides[-len(window):] * wsteps
# The full new shape and strides:
if toend:
new_shape = np.concatenate((shape, window))
new_strides = np.concatenate((strides, new_strides))
else:
_ = np.zeros_like(shape)
_[-len(window):] = window
_window = _.copy()
_[-len(window):] = new_strides
_new_strides = _
new_shape = np.zeros(len(shape)*2, dtype=int)
new_strides = np.zeros(len(shape)*2, dtype=int)
new_shape[::2] = shape
new_strides[::2] = strides
new_shape[1::2] = _window
new_strides[1::2] = _new_strides
new_strides = new_strides[new_shape != 0]
new_shape = new_shape[new_shape != 0]
return np.lib.stride_tricks.as_strided(array, shape=new_shape, strides=new_strides)
def permute_axes(array, axes, copy=False, imag_trick=True, order='C'):
"""Change the arrays axes order, combine multiple axes into one and split
and axis up into new ones (which can be combined in turn). Creates
a view if possible, but when axes are combined will usually return a copy.
Parameters
----------
array : array_like
Array for which to define new axes.
axes : iterable
Iterable giving for every new axis which old axis are combined into it.
np.newaxis/None can be used to insert a new axis. Elements must be
either ints/slices/None/Ellipsis or iterables of ints.
slices and Ellipsis are supported mostly for the use of ReIndex. The
Ellipsis adds all remaining Axes in their previous order, a slice
combines them. slice(None)/[:] collapses all remaining dimensions.
copy : bool
If True a copy is forced.
imag_trick : bool
If given, imaginary numbers will be treated as strides on the given
axis (real part). ie 3+5j means every 5th element of axis 3 (up
to the next larger stride given). Using this, an axis can be inverted
by giving a negative stride, ie. 0-1j inverts axis 0.
order : 'C', 'F', 'A' or 'K'
Whether new array should have C or Fortran order. See np.copy help for
details.
See Also
--------
ReIndex, swapaxes, rollaxis, reshape, split_axes
Examples
--------
>>> a = np.arange(12).reshape(3,2,2)
Just reverse the axes order:
>>> permute_axes(a, (2, 1, 0))
array([[[ 0, 4, 8],
[ 2, 6, 10]],
[[ 1, 5, 9],
[ 3, 7, 11]]])
Combine axis 0 and 1 and put the last axis to the front:
>>> permute_axes(a, (-1, (0, 1)))
array([[ 0, 2, 4, 6, 8, 10],
[ 1, 3, 5, 7, 9, 11]])
Or going over the first two axes in different order:
>>> permute_axes(a, (-1, (1, 0)))
array([[ 0, 4, 8, 2, 6, 10],
[ 1, 5, 9, 3, 7, 11]])
Splitting up one axes into multiple using steps:
>>> a = np.arange(12).reshape(6,2)
>>> permute_axes(a, (0+3j, (0+1j, 1))) # a[3] == array([6, 7])
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11]])
"""
array = np.asarray(array)
if not np.iterable(axes):
axes = [axes]
# abuse of imaginary numbers for axis splitting preperation:
if imag_trick:
split_axesd = _dd(list)
for ax in axes:
is_c = np.iscomplex(ax)
if np.any(is_c):
if np.iterable(ax):
for _ in np.asarray(ax)[is_c]:
split_axesd[_.real].append(_)
else:
split_axesd[ax.real].append(ax)
# Now create the new axes:
sp_axes = split_axesd.keys()
sp_steps = []
new_ax_names = {}
next_ax = array.ndim # next axis at the end...
for a in sp_axes:
spa = split_axesd[a]
sp_steps.append([_.imag for _ in spa])
new_ax_names[spa[0]] = a
for s in spa[1:]:
new_ax_names[s] = next_ax
next_ax += 1
# split up the axes (creating a new view):
array = split_axes(array, steps=sp_steps, axes=sp_axes, toend=True)
# Make arrays for all new dimensions:
new_axes = []
check_axes = np.zeros(array.ndim, dtype=bool)
found_wildcard = None
for i, a in enumerate(axes):
if a is None:
new_axes.append(None)
continue
elif isinstance(a, slice):
# just create an array unless its empty:
start = a.start; stop = a.stop; step = a.step
if start is None:
start = 0
if stop is None and (step is None or step == -1 or step == 1):
if found_wildcard is not None:
raise ValueError("There can be only one Ellipsis/[...] or full slice//[::-1]")
new_axes.append(a)
found_wildcard = i
continue
if stop is None:
stop = array.ndim
a = np.arange(start, stop, step)
elif a is Ellipsis:
new_axes.append(a)
if found_wildcard is not None:
raise ValueError("There can be only one Ellipsis/[...] or full slice/[:]/[::-1]")
found_wildcard = i
continue
else:
a = np.atleast_1d(a)
if a.ndim > 1:
raise ValueError("All items of `axes` must be zero or one dimensional.")
# we need to replace the axes if there is a complex:
is_c = np.iscomplex(a)
if np.any(is_c):
a[is_c] = [new_ax_names[_] for _ in a[is_c]]
a = a.real.astype(int)
a = a.astype(int) # force integers.
if np.any(check_axes[a]) or np.unique(a).shape != a.shape:
raise ValueError("All axis must at most occure once in the new array")
check_axes[a] = True
new_axes.append(a)
# Handle Ellipsis or empty slice:
if found_wildcard is not None:
i = found_wildcard
a = new_axes[i]
if a is Ellipsis:
new_axes[i:i+1] = np.arange(array.ndim)[check_axes==False][:,None]
else:
if a.step == -1:
new_axes[i] = np.arange(array.ndim)[check_axes==False][::-1,None]
else:
new_axes[i] = np.arange(array.ndim)[check_axes==False]
old_shape = np.asarray(array.shape)
old_strides = np.asarray(array.strides)
# Shape and strides for the copy operation:
tmp_shape = []
tmp_strides = []
final_shape = []
final_strides = [] # only used if no copy is needed.
must_copy = False
# create a reordered array first:
for ax, na in enumerate(new_axes):
if na is None or len(na) == 0:
tmp_shape.append(1)
tmp_strides.append(0)
final_shape.append(1)
final_strides.append(0)
continue
_shapes = old_shape[na]
_strides = old_strides[na]
tmp_shape.extend(_shapes)
tmp_strides.extend(_strides)
final_strides.append(_strides.min()) # smallest stride...
final_shape.append(_shapes.prod())
if not must_copy:
# If any of the strides do not fit together we must copy:
prev_stride = _strides[0]
for stride, shape in zip(_strides[1:], _shapes[1:]):
if shape == 1:
# 1 sized dimensions just do not matter, but they
# also do not matter for legality check.
continue
elif prev_stride != stride * shape:
must_copy = True
break
prev_stride = stride
if not must_copy:
result = np.lib.stride_tricks.as_strided(array, shape=final_shape, strides=final_strides)
if copy:
return result.copy(order=order)
return result
# No need for explicite copy, as reshape must already create one since
# must_copy is True.
copy_from = np.lib.stride_tricks.as_strided(array, shape=tmp_shape, strides=tmp_strides)
return copy_from.reshape(final_shape, order=order)
def split_axes(array, steps=(1,), axes=-1, toend=False):
"""Split the given axis into multiple new axes by the use of steps instead
of size. If toend is given, all but the first element from `steps` returns
a new dimension at the end. It is possible to invert axis in this function
by giving a negative step.
"""
new_strides = list(array.strides)
new_shape = list(array.shape)
if not np.iterable(axes):
axes = [axes]
steps = [steps]
offset = 0
_steps = steps
for steps, axis in zip(_steps, axes):
axis = int(axis)
size = array.shape[axis]
steps = np.atleast_1d(steps).astype(int)
negative = steps < 0
steps = np.abs(steps)
I = np.argsort(steps)[::-1]
nsizes = np.zeros_like(steps)
prev_step = size
for i in I:
if prev_step % steps[i] != 0:
raise ValueError("All larger steps must be multiples of the next smaller step (the largest of the original size).")
nsizes[i] = prev_step // steps[i]
prev_step = steps[i]
# effect of negative strides on the data start.
offset += ((nsizes[negative] - 1) * steps[negative] * new_strides[axis]).sum()
steps[negative] *= -1 # reverse absolute value.
if toend:
new_strides.extend(new_strides[axis] * steps[1:])
new_strides[axis] = new_strides[axis] * steps[0]
new_shape.extend(nsizes[1:])
new_shape[axis] = nsizes[0]
else:
# nested list for now:
new_strides[axis] = new_strides[axis] * steps
new_shape[axis] = nsizes
if not toend:
# flatten the temporary nested lists.
_strides = []
_shapes = []
for sh, st in zip(new_shape, new_strides):
if isinstance(sh, np.ndarray):
_shapes.extend(sh)
_strides.extend(st)
else:
_shapes.append(sh)
_strides.append(st)
new_shape = tuple(_shapes)
new_strides = tuple(_strides)
# Taken from as_strided:
interface = dict(array.__array_interface__)
interface['shape'] = tuple(new_shape)
interface['strides'] = tuple(new_strides)
# XXX: Is the second element something special, can data be edited just like that?
interface['data'] = (int(interface['data'][0] + offset), interface['data'][1])
return np.asarray(np.lib.stride_tricks.DummyArray(interface, base=array))
class ReIndex(object):
__slots__ = ['array', 'copy', 'order', '__getitem__', '__init__']
def __init__(self, array, copy=False, order='C'):
"""After initialization will create views or copies with axes order
changed or combined. This is an interface for permute_axes. All tricks
for splitting or inverting an axis using imaginary numbers can be used.
See examples and permute_axis for details.
Usage
-----
After initializing the ReIndex object:
a = ReIndex(array)
you can use slicing operations to shuffle its axes, by giving for each
dimension which old axis is supposed to be placed there. When an
iterable is given, all items are combined into the new axis. When None
is given an empty axis added. Slices work like listing all numbers
(so collaps all their axes into the dimension), however the [:] or [::-1]
collapse all remaining dimensions. [::-1] collapses them in reversed
order. Ellipsis/[...] adds all remaining dimensions in their old order.
Imaginary numbers work as a stride along that dimension (they must fit
together. These strides may be negative.
Returns
-------
A view or copy of the array with the desired axes layout.
Examples
--------
>>> a = ReIndex(np.arange(12).reshape(3,2,2))
Just reverse the axes order:
>>> a[2, 1, 0]
array([[[ 0, 4, 8],
[ 2, 6, 10]],
[[ 1, 5, 9],
[ 3, 7, 11]]])
Combine axis 0 and 1 and put the last axis to the front:
>>> a[-1, (0, 1)]
array([[ 0, 2, 4, 6, 8, 10],
[ 1, 3, 5, 7, 9, 11]])
Or going over the first two axes in different order:
>>> a[-1, (1, 0)]
array([[ 0, 4, 8, 2, 6, 10],
[ 1, 5, 9, 3, 7, 11]])
Splitting up one axes into multiple using steps:
>>> a = ReIndex(np.arange(12).reshape(6,2))
>>> a[0+3j, (0+1j, 1))] # a[...][3] == array([6, 7])
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11]])
Use of [...] or [:]:
>>> a = ReIndex(np.arange(12).reshape(3,2,2))
>>> all(a[-1, (0, 1)] == a[-1,:])
True
>>> all(a[-1, (1, 0)] == a[-1,::-1])
True
>>> all(a[1, 0, 2] == a[1,...])
True
See Also
--------
permute_axes, swapaxes, rollaxis, reshape, split_axes
"""
self.array = array
self.copy = copy
self.order = order
def __getitem__(self, args):
return permute_axes(self.array, args, copy=self.copy, order=self.order)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment