|
class OrderedSet(object): |
|
"""A OrderedSet object is an ordered collection of distinct hashable objects. |
|
It's designed to be a drop-in replacement of set. |
|
Except `remove`, `pop` behavior is like a list, not a set.""" |
|
|
|
__hash__ = None |
|
|
|
def __init__(self, iterable=None): |
|
if isinstance(iterable, OrderedSet): |
|
self.__set = iterable._get_set_copy() |
|
self.__list = iterable._get_list_copy() |
|
else: |
|
self.__set = set() |
|
self.__list = [] |
|
if iterable is not None: |
|
self |= iterable |
|
|
|
def _get_set_copy(self): |
|
return self.__set.copy() |
|
|
|
def _get_list_copy(self): |
|
return self.__list[:] |
|
|
|
def __len__(self): |
|
return len(self.__set) |
|
|
|
def __contains__(self, x): |
|
return x in self.__set |
|
|
|
def __iter__(self): |
|
return iter(self.__list) |
|
|
|
def __reversed__(self): |
|
return reversed(self.__list) |
|
|
|
def __getitem__(self, index): |
|
return self.__list[index] |
|
|
|
def __delitem__(self, index): |
|
self.pop(index) |
|
|
|
def __getslice__(self, i, j): |
|
return self.__list[i:j] |
|
|
|
def __le__(self, other): |
|
if len(self) > len(other): |
|
return False |
|
for elem in self: |
|
if elem not in other: |
|
return False |
|
return True |
|
|
|
def __lt__(self, other): |
|
return len(self) < len(other) and self.__le__(other) |
|
|
|
def __gt__(self, other): |
|
return other < self |
|
|
|
def __ge__(self, other): |
|
return other <= self |
|
|
|
def __eq__(self, other): |
|
return len(self) == len(other) and self.__le__(other) |
|
|
|
def __ne__(self, other): |
|
return not (self == other) |
|
|
|
def isdisjoint(self, other): |
|
"""Return True if two sets have a null intersection.""" |
|
for value in other: |
|
if value in self: |
|
return False |
|
return True |
|
|
|
def issubset(self, other): |
|
"""Report whether another set contains this set.""" |
|
return self <= other |
|
|
|
def issuperset(self, other): |
|
"""Report whether this set contains another set.""" |
|
return self >= other |
|
|
|
def index(self, elem): |
|
return self.__list.index(elem) |
|
|
|
@classmethod |
|
def _from_iterable(cls, iterable): |
|
"""Construct an instance of the class from any iterable input.""" |
|
return cls(iterable) |
|
|
|
def __and__(self, other): |
|
other = self._from_iterable(other) |
|
return self._from_iterable(value for value in self if value in other) |
|
|
|
def __or__(self, other): |
|
chain = (e for s in (self, other) for e in s) |
|
return self._from_iterable(chain) |
|
|
|
def __sub__(self, other): |
|
other = self._from_iterable(other) |
|
return self._from_iterable(value for value in self if value not in other) |
|
|
|
def __xor__(self, other): |
|
other = self._from_iterable(other) |
|
return (self - other) | (other - self) |
|
|
|
def union(self, other): |
|
"""Return a new set with elements from the set and the other.""" |
|
return self | other |
|
|
|
def intersection(self, other): |
|
"""Return a new set with elements common to the set and the other.""" |
|
return self & other |
|
|
|
def difference(self, other): |
|
"""Return a new set with elements in the set that are not in the other.""" |
|
return self - other |
|
|
|
def symmetric_difference(self, other): |
|
"""Return a new set with elements in either the set or other but not both.""" |
|
return self ^ other |
|
|
|
def copy(self): |
|
return self._from_iterable(self) |
|
|
|
def add(self, elem): |
|
"""Append an element to the end of set. |
|
This has no effect if the element is already present.""" |
|
if elem not in self: |
|
self.__set.add(elem) |
|
self.__list.append(elem) |
|
|
|
def remove(self, elem): |
|
"""Remove an element from the set. |
|
Raises ValueError if the element is not present.""" |
|
if elem not in self: |
|
raise ValueError('x not in set') |
|
self.discard(elem) |
|
|
|
def discard(self, elem): |
|
"""Remove an element from the set. |
|
This has no effect if the element is not present.""" |
|
import warnings |
|
warnings.warn('This function have serious performance issue. ' |
|
'You should use `difference` when removing large amout of items.') |
|
if elem in self: |
|
self.__set.discard(elem) |
|
self.__list.remove(elem) |
|
|
|
def pop(self, index=-1): |
|
"""Remove and return item at index (default last). |
|
Raises IndexError if set is empty or index is out of range.""" |
|
set_len = len(self.__set) |
|
if set_len == 0 or set_len <= index: |
|
raise IndexError('pop from empty set') |
|
value = self.__list.pop(index) |
|
self.__set.discard(value) |
|
return value |
|
|
|
def clear(self): |
|
"""Remove all elements from the set.""" |
|
self.__set.clear() |
|
self.__list = [] |
|
|
|
def __ior__(self, iterable): |
|
self.update(iterable) |
|
return self |
|
|
|
# TODO: Implement fast in-place and. Without this, `a &= b` will return a new `a`. |
|
#def __iand__(self, iterable): |
|
# self.intersection_update(iterable) |
|
# return self |
|
|
|
# TODO: Implement fast in-place subtract. Without this, `a -= b` will return a new `a`. |
|
#def __isub__(self, iterable): |
|
# self.difference_update(iterable) |
|
# return self |
|
|
|
# TODO: Implement fast in-place xor. Without this, `a ^= b` will return a new `a`. |
|
#def __ixor__(self, iterable): |
|
# self.symmetric_difference_update(iterable) |
|
# return self |
|
|
|
def update(self, other): |
|
"""Update the set, keeping only elements found in it and the other.""" |
|
seen = self.__set |
|
seen_add = seen.add |
|
self.__list.extend(i for i in other if not i in seen and not seen_add(i)) |
|
|
|
# FIXME: Following functions doesn't work as expected. Please use &= instead. |
|
#def intersection_update(self, other): |
|
# """Update the set, removing elements found in the other.""" |
|
# for value in (self - iterable): |
|
# self.discard(value) |
|
|
|
# FIXME: Following functions doesn't work as expected. Please use -= instead. |
|
#def difference_update(self, other): |
|
# """Update the set, removing elements found in the other.""" |
|
# for value in iterable: |
|
# if value in self: |
|
# self.discard(value) |
|
# else: |
|
# self.add(value) |
|
|
|
# FIXME: Following functions doesn't work as expected. Please use ^= instead. |
|
#def symmetric_difference_update(self, other): |
|
# """Update the set, keeping only elements found in either set, but not in both.""" |
|
# for value in iterable: |
|
# self.discard(value) |
|
|
|
def __repr__(self): |
|
return 'OrderedSet(%s)' % repr(self.__list) |
|
|
|
def __str__(self): |
|
return repr(self) |
|
|
|
def __unicode__(self): |
|
return unicode(str(self)) |
|
|
|
def __del__(self): |
|
self.clear() |
|
|
|
|
|
if __name__ == '__main__': |
|
a = OrderedSet('abracadabra') |
|
b = OrderedSet('ommanipadmehum') |
|
print(a) |
|
print(b) |
|
print(a < b) |
|
print(a <= b) |
|
print(a == b) |
|
print(a != b) |
|
print(a >= b) |
|
print(a > b) |
|
print(a | b) |
|
print(b | a) |
|
print(a & b) |
|
print(b & a) |
|
print(a - b) |
|
print(b - a) |
|
print(a ^ b) |
|
print(b ^ a) |
|
|
|
print('Benchmark...') |
|
|
|
from random import randint |
|
from time import time |
|
|
|
data1 = [randint(1, 100000) for _ in xrange(100000)] |
|
data2 = [randint(1, 100000) for _ in xrange(100000)] |
|
|
|
t0 = time() |
|
OrderedSet(data1) |
|
t = time() - t0 |
|
print('init with 100k random integers: %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
c = a | b |
|
t = time() - t0 |
|
print('a | b: %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
c = a & b |
|
t = time() - t0 |
|
print('a & b: %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
c = a - b |
|
t = time() - t0 |
|
print('a - b: %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
c = a ^ b |
|
t = time() - t0 |
|
print('a ^ b: %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
aid = id(a) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
a |= b |
|
t = time() - t0 |
|
print('a |= b: %fs %s' % (t, '' if aid == id(a) else '(not in-place)')) |
|
|
|
a = OrderedSet(data1) |
|
aid = id(a) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
a &= b |
|
t = time() - t0 |
|
print('a &= b: %fs %s' % (t, '' if aid == id(a) else '(not in-place)')) |
|
|
|
a = OrderedSet(data1) |
|
aid = id(a) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
a -= b |
|
t = time() - t0 |
|
print('a -= b: %fs %s' % (t, '' if aid == id(a) else '(not in-place)')) |
|
|
|
a = OrderedSet(data1) |
|
aid = id(a) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
a ^= b |
|
t = time() - t0 |
|
print('a ^= b: %fs %s' % (t, '' if aid == id(a) else '(not in-place)')) |
|
|
|
a = OrderedSet() |
|
t0 = time() |
|
c = [a.copy() for _ in xrange(100000)] |
|
t = time() - t0 |
|
print('create 100k shallow copies: %fs' % t) |
|
|
|
a = OrderedSet() |
|
t0 = time() |
|
[a.add(i) for i in data1] |
|
t = time() - t0 |
|
print('random add 100k items: %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
alen = len(a) |
|
t0 = time() |
|
[a.__delitem__(i) for i in xrange(alen-1, -1, -1)] |
|
t = time() - t0 |
|
print('reversed sequential delete %d items: %fs' % (alen, t)) |
|
|
|
a = OrderedSet(data1) |
|
t0 = time() |
|
[a.discard(i) for i in data2] |
|
t = time() - t0 |
|
print('random discard 100k items: %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
t0 = time() |
|
while 1: |
|
try: |
|
a.pop() |
|
except IndexError: |
|
break |
|
t = time() - t0 |
|
print('pop all items: %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
t0 = time() |
|
c = list(a) |
|
t = time() - t0 |
|
print('list(a): %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
t0 = time() |
|
c = reversed(a) |
|
t = time() - t0 |
|
print('reversed(a): %fs' % t) |
|
|
|
a = OrderedSet(data1) |
|
b = OrderedSet(data2) |
|
t0 = time() |
|
[(i in a) for i in b] |
|
t = time() - t0 |
|
print('[(i in a) for i in b]: %fs' % t) |