Skip to content

Instantly share code, notes, and snippets.

@georgepsarakis
Created December 12, 2013 08:17
Show Gist options
  • Save georgepsarakis/7924750 to your computer and use it in GitHub Desktop.
Save georgepsarakis/7924750 to your computer and use it in GitHub Desktop.
Extended list. Maintains a dictionary for faster lookups (optionally keeps hash of item object representation by marshalling) and with some operator overloading can perform mathematical operations among instances.
from collections import MutableSequence
import marshal
from operator import add, div, mul, neg, mod, sub
from operator import ge, gt, le, lt, eq
from itertools import imap
'''
Extended list class
- fast lookups by maintaining a dictionary with the values
- numeric operations between 2 instances are possible (like Octave matrices)
'''
class ListExtended(MutableSequence):
'''
*args : initialize list with these elements
**kwargs :
- hashing : create hash of the list objects for the lookup table (boolean)
- lookup : maintain a dictionary for fast element lookups
'''
def __init__(self, *args, **kwargs):
self.list = args[:]
self.optimize = 'lookup' in kwargs and kwargs['lookup']
self.hashing = 'hashing' in kwargs and kwargs['hashing']
self.__rehash()
def __len__(self):
return len(self.list)
def __repr__(self):
return marshal.dumps(self.list)
def __str__(self):
return str(self.list)
def __unicode__(self):
return unicode(self.list)
def __gt__(self, other):
return self.__binary_operation(other, gt)
def __lt__(self, other):
return self.__binary_operation(other, lt)
def __ge__(self, other):
return self.__binary_operation(other, ge)
def __le__(self, other):
return self.__binary_operation(other, le)
def __eq__(self, other):
return self.__binary_operation(other, eq)
def __getitem__(self, index):
return self.list[index]
def __remove_lookup(self, index):
if self.optimize:
key = self.list[index]
if self.hashing:
key = hash(marshal.dumps(key))
del self.lookup[key]
def __set_lookup(self, index, value):
if self.optimize:
if self.hashing:
value = hash(marshal.dumps(value))
self.lookup[value] = index
def __delitem__(self, index):
self.__remove_lookup(index)
del self.list[index]
def __setitem__(self, index, value):
self.__remove_lookup(index)
self.list[index] = value
self.__set_lookup(index, value)
def find(self, item):
exists = False
if self.optimize:
if self.hashing:
item = hash(marshal.dumps(item))
exists = item in self.lookup
if exists:
index = self.lookup[item]
else:
index = None
else:
try:
index = self.list.index(item)
exists = True
except ValueError:
index = None
return (exists, index)
def __contains__(self, item):
exists = False
if self.optimize:
if self.hashing:
item = hash(marshal.dumps(item))
exists = item in self.lookup
else:
try:
index = self.list.index(item)
except ValueError:
exists = False
return exists
def __iter__(self):
for item in self.list:
yield item
def __reversed__(self):
L = len(self.list)
for index in xrange(L - 1, -1, -1):
yield self.list[index]
''' useful to treat lists as numeric arrays '''
def __binary_operation(self, other, operation):
if len(other) != len(self.list):
return None
L = len(self.list)
def op(index):
return operation(self.list[index], other[index])
return map(op, xrange(L))
def __unary_operation(self, operation):
return map(operation, self.list)
def __add__(self, other):
return self.__binary_operation(other, add)
def __mul__(self, other):
return self.__binary_operation(other, mul)
def __div__(self, other):
return self.__binary_operation(other, div)
def __mod__(self, other):
return self.__binary_operation(other, mod)
def __sub__(self, other):
return self.__binary_operation(other, sub)
def __neg__(self):
return self.__unary_operation(neg)
def __rehash(self):
self.lookup = {}
if self.optimize:
if self.hashing:
L = imap(hash, imap(marshal.dumps, self.list))
index = 0
for item in L:
self.lookup[item] = index
index += 1
else:
L = len(self.list)
self.lookup = dict([ (self.list[item], item) for item in L ])
def insert(self, index, value):
self.list.insert(index, value)
''' needs rehashing - slow operation '''
self.__rehash()
if __name__ == "__main__":
from time import time
a = ListExtended(*range(2, 20, 1), lookup=True, hashing=True)
b = ListExtended(*range(4, 40, 2))
print 'a', a
print 'b', b
print 'a + b = ', a + b
print 'a * b = ', a * b
print '-a = ', -a
print 'b - a = ', b-a
print 'b/a = ', b/a
t_optimize = 0.
for n in xrange(1000):
t_optimize -= time()
a.find(12)
t_optimize += time()
t_simple = 0.
for n in xrange(1000):
t_simple -= time()
b.find(17)
t_simple += time()
''' nothing surprising, just verifying it works! '''
''' for small lists it does not make a difference '''
print "Optimized find:", t_optimize/1000.
print "Simple find:", t_simple/1000.
print "Simple/Optimized", t_simple/t_optimize
a = ListExtended(*range(2, 20000, 1), lookup=True, hashing=True)
b = ListExtended(*range(4, 40000, 2))
t_optimize = 0.
for n in xrange(1000):
t_optimize -= time()
a.find(12)
t_optimize += time()
t_simple = 0.
for n in xrange(1000):
t_simple -= time()
b.find(17)
t_simple += time()
''' for larger lists it _does_ make a difference '''
print "Optimized find:", t_optimize/1000.
print "Simple find:", t_simple/1000.
print "Simple/Optimized", t_simple/t_optimize
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment