Created
December 12, 2013 08:17
-
-
Save georgepsarakis/7924750 to your computer and use it in GitHub Desktop.
Extended list. Maintains a dictionary for faster lookups (optionally keeps hash of item object representation by marshalling) and with some operator overloading can perform mathematical operations among instances.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import MutableSequence | |
import marshal | |
from operator import add, div, mul, neg, mod, sub | |
from operator import ge, gt, le, lt, eq | |
from itertools import imap | |
''' | |
Extended list class | |
- fast lookups by maintaining a dictionary with the values | |
- numeric operations between 2 instances are possible (like Octave matrices) | |
''' | |
class ListExtended(MutableSequence): | |
''' | |
*args : initialize list with these elements | |
**kwargs : | |
- hashing : create hash of the list objects for the lookup table (boolean) | |
- lookup : maintain a dictionary for fast element lookups | |
''' | |
def __init__(self, *args, **kwargs): | |
self.list = args[:] | |
self.optimize = 'lookup' in kwargs and kwargs['lookup'] | |
self.hashing = 'hashing' in kwargs and kwargs['hashing'] | |
self.__rehash() | |
def __len__(self): | |
return len(self.list) | |
def __repr__(self): | |
return marshal.dumps(self.list) | |
def __str__(self): | |
return str(self.list) | |
def __unicode__(self): | |
return unicode(self.list) | |
def __gt__(self, other): | |
return self.__binary_operation(other, gt) | |
def __lt__(self, other): | |
return self.__binary_operation(other, lt) | |
def __ge__(self, other): | |
return self.__binary_operation(other, ge) | |
def __le__(self, other): | |
return self.__binary_operation(other, le) | |
def __eq__(self, other): | |
return self.__binary_operation(other, eq) | |
def __getitem__(self, index): | |
return self.list[index] | |
def __remove_lookup(self, index): | |
if self.optimize: | |
key = self.list[index] | |
if self.hashing: | |
key = hash(marshal.dumps(key)) | |
del self.lookup[key] | |
def __set_lookup(self, index, value): | |
if self.optimize: | |
if self.hashing: | |
value = hash(marshal.dumps(value)) | |
self.lookup[value] = index | |
def __delitem__(self, index): | |
self.__remove_lookup(index) | |
del self.list[index] | |
def __setitem__(self, index, value): | |
self.__remove_lookup(index) | |
self.list[index] = value | |
self.__set_lookup(index, value) | |
def find(self, item): | |
exists = False | |
if self.optimize: | |
if self.hashing: | |
item = hash(marshal.dumps(item)) | |
exists = item in self.lookup | |
if exists: | |
index = self.lookup[item] | |
else: | |
index = None | |
else: | |
try: | |
index = self.list.index(item) | |
exists = True | |
except ValueError: | |
index = None | |
return (exists, index) | |
def __contains__(self, item): | |
exists = False | |
if self.optimize: | |
if self.hashing: | |
item = hash(marshal.dumps(item)) | |
exists = item in self.lookup | |
else: | |
try: | |
index = self.list.index(item) | |
except ValueError: | |
exists = False | |
return exists | |
def __iter__(self): | |
for item in self.list: | |
yield item | |
def __reversed__(self): | |
L = len(self.list) | |
for index in xrange(L - 1, -1, -1): | |
yield self.list[index] | |
''' useful to treat lists as numeric arrays ''' | |
def __binary_operation(self, other, operation): | |
if len(other) != len(self.list): | |
return None | |
L = len(self.list) | |
def op(index): | |
return operation(self.list[index], other[index]) | |
return map(op, xrange(L)) | |
def __unary_operation(self, operation): | |
return map(operation, self.list) | |
def __add__(self, other): | |
return self.__binary_operation(other, add) | |
def __mul__(self, other): | |
return self.__binary_operation(other, mul) | |
def __div__(self, other): | |
return self.__binary_operation(other, div) | |
def __mod__(self, other): | |
return self.__binary_operation(other, mod) | |
def __sub__(self, other): | |
return self.__binary_operation(other, sub) | |
def __neg__(self): | |
return self.__unary_operation(neg) | |
def __rehash(self): | |
self.lookup = {} | |
if self.optimize: | |
if self.hashing: | |
L = imap(hash, imap(marshal.dumps, self.list)) | |
index = 0 | |
for item in L: | |
self.lookup[item] = index | |
index += 1 | |
else: | |
L = len(self.list) | |
self.lookup = dict([ (self.list[item], item) for item in L ]) | |
def insert(self, index, value): | |
self.list.insert(index, value) | |
''' needs rehashing - slow operation ''' | |
self.__rehash() | |
if __name__ == "__main__": | |
from time import time | |
a = ListExtended(*range(2, 20, 1), lookup=True, hashing=True) | |
b = ListExtended(*range(4, 40, 2)) | |
print 'a', a | |
print 'b', b | |
print 'a + b = ', a + b | |
print 'a * b = ', a * b | |
print '-a = ', -a | |
print 'b - a = ', b-a | |
print 'b/a = ', b/a | |
t_optimize = 0. | |
for n in xrange(1000): | |
t_optimize -= time() | |
a.find(12) | |
t_optimize += time() | |
t_simple = 0. | |
for n in xrange(1000): | |
t_simple -= time() | |
b.find(17) | |
t_simple += time() | |
''' nothing surprising, just verifying it works! ''' | |
''' for small lists it does not make a difference ''' | |
print "Optimized find:", t_optimize/1000. | |
print "Simple find:", t_simple/1000. | |
print "Simple/Optimized", t_simple/t_optimize | |
a = ListExtended(*range(2, 20000, 1), lookup=True, hashing=True) | |
b = ListExtended(*range(4, 40000, 2)) | |
t_optimize = 0. | |
for n in xrange(1000): | |
t_optimize -= time() | |
a.find(12) | |
t_optimize += time() | |
t_simple = 0. | |
for n in xrange(1000): | |
t_simple -= time() | |
b.find(17) | |
t_simple += time() | |
''' for larger lists it _does_ make a difference ''' | |
print "Optimized find:", t_optimize/1000. | |
print "Simple find:", t_simple/1000. | |
print "Simple/Optimized", t_simple/t_optimize |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment