Created
January 6, 2013 13:36
-
-
Save Kos/4467206 to your computer and use it in GitHub Desktop.
Slicing gotchas in Python 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Some setup... | |
import sys | |
def throws(msg, f): | |
try: | |
f() | |
except Exception, e: | |
return msg in str(e) | |
else: | |
return False | |
class OldStyle: | |
def __getitem__(self, arg): | |
return arg | |
class NewStyle(object): | |
def __getitem__(self, arg): | |
return arg | |
a = OldStyle() | |
b = NewStyle() | |
# And we're set! | |
# Basic lookup is the same for both old-style and new-style objects: | |
for x in a, b: | |
# Simple values are passed as-is | |
assert x[1] == 1 | |
assert x["hi"] == 'hi' | |
assert x[-5] == -5 | |
# Commas denote tuples: | |
assert x[1,2] == (1,2) | |
assert x[1,] == (1,) | |
# There's a special syntax for slices | |
# start:stop:step | |
assert x[1:5] == slice(1, 5, None) | |
assert x[1:10:2] == slice(1, 10, 2) | |
# Slice arguments can be of any type | |
assert x[-0.5:0.5:0.1] == slice(-.5, .5, .1) | |
assert x['Hello':123.45:[]] == slice('Hello',123.45,[]) | |
# There's a special syntax for ellipses as well | |
assert x[...] == Ellipsis | |
# We can mix them together | |
assert x[1, ..., 2:3] == (1, Ellipsis, slice(2,3,None)) | |
# There are differences, though. | |
# Remember: a SIMPLE SLICING looks like obj[a:b], | |
# where a and b are optional integers. | |
# We'll talk about them a lot. | |
# Difference 1: Omitted indices in simple slicings | |
assert a[:2] == slice(0, 2,None) | |
assert b[:2] == slice(None,2,None) | |
assert a[10:] == slice(10, sys.maxint, None) | |
assert b[10:] == slice(10, None, None) | |
# See? Old-style classes use 0 and sys.maxint respectively in place of omitted values | |
# and new-style classes simply put None. | |
# Q: Does this change results if we call .indices() on a slice? | |
# A: Nope, luckily: | |
for example_length in (0, 1, 2, 3, 5, 10, 15, 100): | |
assert a[:2].indices(example_length) == b[:2].indices(example_length) | |
assert a[:10].indices(example_length) == b[:10].indices(example_length) | |
# Difference 2: Negatives in simple slicings | |
# When a negative integer index is found, | |
# Old-style asks for __len__ and subtracts from it. | |
# New-style keeps it as is. | |
assert throws("has no attribute '__len__'", | |
lambda: a[0:-2] == slice(0, -2, None)) | |
assert b[0:-2] == slice(0, -2, None) | |
# Let's give our classes a length now to see how things work... | |
Length = a.length = b.length = 15 | |
def len(self): | |
return self.length | |
OldStyle.__len__ = NewStyle.__len__ = len | |
assert len(a) == len(b) == Length | |
# having that, let's try again | |
assert a[-10:-5] == slice(Length-10, Length-5, None) | |
assert b[-10:-5] == slice( -10, -5, None) | |
# Note that this substraction in old-style classes | |
# won't prevent you from actually ending up with negatives! | |
assert a[0:-Length*2].stop < 0 | |
# Either way, use indices() and you're still safe, as long as you pass len() as length. | |
# Proof: | |
assert a[-10:-5].indices(len(a)) == b[-10:-5].indices(len(b)) | |
assert a[-5:].indices(len(a)) == b[-5:].indices(len(b)) | |
assert a[:-100].indices(len(a)) == b[:-100].indices(len(b)) | |
# Q: Why do you specifically refer to "simple slicings"? | |
# A: Because extended slicings always use the new-style interpretation, | |
# no matter if the object is old-style or new-style! | |
# Extended slicing: slice with a non-integer value | |
assert a[-1:'a'] == b[-1:'a'] == slice(-1, 'a', None) | |
# Extended slicing: a tuple of slices | |
assert a[-2:-3, 4:6] == b[-2:-3, 4:6] == (slice(-2, -3, None), slice(4, 6,None)) | |
# So if you want to easily enforce new-style slice interpretation for any reason, | |
# here are several tricks: | |
# Pass None instead omitting an omitted parameter | |
assert a[:None] == b[:None] == slice(None, None, None) | |
# Pass an empty 'step' parameter | |
assert a[-2::] == b[-2::] == slice(-2, None, None) | |
# Difference 3: __getslice__ | |
# If a special member function __getslice__ is present, | |
# all simple slicings use it, while extended slicings use __getitem__. | |
# let's add it to our classes: | |
def getslice(self, start, end): | |
'''Contrary to __getitem__, __getslice__ always receives two integral parameters.''' | |
return 'getslice', start, end | |
OldStyle.__getslice__ = NewStyle.__getslice__ = getslice | |
# Even though __getslice__ is a legacy construct, | |
# it's applicable to both old-style and new-style classes. | |
assert a[1:2] == b[1:2] == ('getslice', 1, 2) | |
# __getslice__ behaves much like old-style interpretation: | |
assert a[:] == b[:] == ('getslice', 0, sys.maxint) | |
assert a[-2:-5] == b[-2:-5] == ('getslice', len(a)-2, len(a)-5) | |
# extended slicing still uses ol' good __getitem__: | |
assert a[::] == b[::] == slice(None, None, None) | |
assert a[1.:2.] == b[1.:2.] == slice(1., 2., None) | |
# There's a tiny gotcha with __getslice__ though: | |
# If they don't have len()s... | |
del OldStyle.__len__, NewStyle.__len__ | |
# old-style fails | |
assert throws('__len__', lambda: a[-2:-5]) | |
# while new-style assumes length == 0 | |
assert b[-2:-5] == ('getslice', -2, -5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment