georgepsarakis · December 12, 2013 08:17
diff --git a/extended-list.py b/extended-list.py
 from collections import MutableSequence
 import marshal
 from operator import add, div, mul, neg, mod, sub
 from operator import ge, gt, le, lt, eq
 from itertools import imap

 ''' 
 Extended list class
 - fast lookups by maintaining a dictionary with the values
 - numeric operations between 2 instances are possible (like Octave matrices)
 '''
 class ListExtended(MutableSequence):
  ''' 
  *args    : initialize list with these elements
  **kwargs :
    - hashing : create hash of the list objects for the lookup table (boolean)
    - lookup  : maintain a dictionary for fast element lookups
  '''
  def __init__(self, *args, **kwargs):
    self.list = args[:]
    self.optimize = 'lookup' in kwargs and kwargs['lookup']
    self.hashing = 'hashing' in kwargs and kwargs['hashing']    
    self.__rehash()
  
  def __len__(self):
    return len(self.list)
    
  def __repr__(self):
    return marshal.dumps(self.list)

  def __str__(self):
    return str(self.list)

  def __unicode__(self):
    return unicode(self.list)

  def __gt__(self, other):
    return self.__binary_operation(other, gt)

  def __lt__(self, other):
    return self.__binary_operation(other, lt)
 
  def __ge__(self, other):
    return self.__binary_operation(other, ge)          

  def __le__(self, other):
    return self.__binary_operation(other, le)

  def __eq__(self, other):
    return self.__binary_operation(other, eq)
 
  def __getitem__(self, index):
    return self.list[index]
  
  def __remove_lookup(self, index):
    if self.optimize:
      key = self.list[index]
      if self.hashing:
        key = hash(marshal.dumps(key))
      del self.lookup[key]
  
  def __set_lookup(self, index, value):
    if self.optimize:
      if self.hashing:
        value = hash(marshal.dumps(value))
      self.lookup[value] = index

  def __delitem__(self, index):
    self.__remove_lookup(index)
    del self.list[index]

  def __setitem__(self, index, value):
    self.__remove_lookup(index)
    self.list[index] = value
    self.__set_lookup(index, value)

  def find(self, item):
    exists = False
    if self.optimize:
      if self.hashing:
        item = hash(marshal.dumps(item))
      exists = item in self.lookup
      if exists:
        index = self.lookup[item]
      else:
        index = None
    else:
      try:
        index = self.list.index(item)
        exists = True
      except ValueError:
        index = None
    return (exists, index)

  def __contains__(self, item):
    exists = False
    if self.optimize:
      if self.hashing:
        item = hash(marshal.dumps(item))
      exists = item in self.lookup
    else:
      try:
        index = self.list.index(item)
      except ValueError:
        exists = False
    return exists

  def __iter__(self):
    for item in self.list:
      yield item

  def __reversed__(self):
    L = len(self.list)
    for index in xrange(L - 1, -1, -1):
      yield self.list[index]
  
  ''' useful to treat lists as numeric arrays '''
  def __binary_operation(self, other, operation):
    if len(other) != len(self.list):
      return None
    L = len(self.list)
    def op(index):
      return operation(self.list[index], other[index])
    return map(op, xrange(L))

  def __unary_operation(self, operation):
    return map(operation, self.list)
         
  def __add__(self, other):
    return self.__binary_operation(other, add)

  def __mul__(self, other):
    return self.__binary_operation(other, mul)

  def __div__(self, other):
    return self.__binary_operation(other, div)

  def __mod__(self, other):
    return self.__binary_operation(other, mod)

  def __sub__(self, other):
    return self.__binary_operation(other, sub)

  def __neg__(self):
    return self.__unary_operation(neg)
  
  def __rehash(self):
    self.lookup = {}
    if self.optimize:
      if self.hashing:
        L = imap(hash, imap(marshal.dumps, self.list))
        index = 0        
        for item in L:
          self.lookup[item] = index
          index += 1
      else:
          L = len(self.list)
          self.lookup = dict([ (self.list[item], item) for item in L ])

  def insert(self, index, value):
    self.list.insert(index, value)
    ''' needs rehashing - slow operation '''
    self.__rehash()

 if __name__ == "__main__":
  from time import time
  a = ListExtended(*range(2, 20, 1), lookup=True, hashing=True)
  b = ListExtended(*range(4, 40, 2))
  print 'a', a
  print 'b', b
  print 'a + b = ', a + b
  print 'a * b = ', a * b
  print '-a    = ', -a
  print 'b - a = ', b-a
  print 'b/a   = ', b/a
  t_optimize = 0.
  for n in xrange(1000):
    t_optimize -= time()
    a.find(12)
    t_optimize += time()

  t_simple = 0.
  for n in xrange(1000):
    t_simple -= time()
    b.find(17)
    t_simple += time()
  ''' nothing surprising, just verifying it works! '''
  ''' for small lists it does not make a difference '''
  print "Optimized find:",  t_optimize/1000.
  print "Simple find:",  t_simple/1000.
  print "Simple/Optimized", t_simple/t_optimize
  a = ListExtended(*range(2, 20000, 1), lookup=True, hashing=True)
  b = ListExtended(*range(4, 40000, 2))
  t_optimize = 0.
  for n in xrange(1000):
    t_optimize -= time()
    a.find(12)
    t_optimize += time()

  t_simple = 0.
  for n in xrange(1000):
    t_simple -= time()
    b.find(17)
    t_simple += time()
  ''' for larger lists it _does_ make a difference '''
  print "Optimized find:",  t_optimize/1000.
  print "Simple find:",  t_simple/1000.
  print "Simple/Optimized", t_simple/t_optimize
	from collections import MutableSequence
	import marshal
	from operator import add, div, mul, neg, mod, sub
	from operator import ge, gt, le, lt, eq
	from itertools import imap

	'''
	Extended list class
	- fast lookups by maintaining a dictionary with the values
	- numeric operations between 2 instances are possible (like Octave matrices)
	'''
	class ListExtended(MutableSequence):
	'''
	*args : initialize list with these elements
	**kwargs :
	- hashing : create hash of the list objects for the lookup table (boolean)
	- lookup : maintain a dictionary for fast element lookups
	'''
	def __init__(self, args, *kwargs):
	self.list = args[:]
	self.optimize = 'lookup' in kwargs and kwargs['lookup']
	self.hashing = 'hashing' in kwargs and kwargs['hashing']
	self.__rehash()

	def __len__(self):
	return len(self.list)

	def __repr__(self):
	return marshal.dumps(self.list)

	def __str__(self):
	return str(self.list)

	def __unicode__(self):
	return unicode(self.list)

	def __gt__(self, other):
	return self.__binary_operation(other, gt)

	def __lt__(self, other):
	return self.__binary_operation(other, lt)

	def __ge__(self, other):
	return self.__binary_operation(other, ge)

	def __le__(self, other):
	return self.__binary_operation(other, le)

	def __eq__(self, other):
	return self.__binary_operation(other, eq)

	def __getitem__(self, index):
	return self.list[index]

	def __remove_lookup(self, index):
	if self.optimize:
	key = self.list[index]
	if self.hashing:
	key = hash(marshal.dumps(key))
	del self.lookup[key]

	def __set_lookup(self, index, value):
	if self.optimize:
	if self.hashing:
	value = hash(marshal.dumps(value))
	self.lookup[value] = index

	def __delitem__(self, index):
	self.__remove_lookup(index)
	del self.list[index]

	def __setitem__(self, index, value):
	self.__remove_lookup(index)
	self.list[index] = value
	self.__set_lookup(index, value)

	def find(self, item):
	exists = False
	if self.optimize:
	if self.hashing:
	item = hash(marshal.dumps(item))
	exists = item in self.lookup
	if exists:
	index = self.lookup[item]
	else:
	index = None
	else:
	try:
	index = self.list.index(item)
	exists = True
	except ValueError:
	index = None
	return (exists, index)

	def __contains__(self, item):
	exists = False
	if self.optimize:
	if self.hashing:
	item = hash(marshal.dumps(item))
	exists = item in self.lookup
	else:
	try:
	index = self.list.index(item)
	except ValueError:
	exists = False
	return exists

	def __iter__(self):
	for item in self.list:
	yield item

	def __reversed__(self):
	L = len(self.list)
	for index in xrange(L - 1, -1, -1):
	yield self.list[index]

	''' useful to treat lists as numeric arrays '''
	def __binary_operation(self, other, operation):
	if len(other) != len(self.list):
	return None
	L = len(self.list)
	def op(index):
	return operation(self.list[index], other[index])
	return map(op, xrange(L))

	def __unary_operation(self, operation):
	return map(operation, self.list)

	def __add__(self, other):
	return self.__binary_operation(other, add)

	def __mul__(self, other):
	return self.__binary_operation(other, mul)

	def __div__(self, other):
	return self.__binary_operation(other, div)

	def __mod__(self, other):
	return self.__binary_operation(other, mod)

	def __sub__(self, other):
	return self.__binary_operation(other, sub)

	def __neg__(self):
	return self.__unary_operation(neg)

	def __rehash(self):
	self.lookup = {}
	if self.optimize:
	if self.hashing:
	L = imap(hash, imap(marshal.dumps, self.list))
	index = 0
	for item in L:
	self.lookup[item] = index
	index += 1
	else:
	L = len(self.list)
	self.lookup = dict([ (self.list[item], item) for item in L ])

	def insert(self, index, value):
	self.list.insert(index, value)
	''' needs rehashing - slow operation '''
	self.__rehash()

	if __name__ == "__main__":
	from time import time
	a = ListExtended(*range(2, 20, 1), lookup=True, hashing=True)
	b = ListExtended(*range(4, 40, 2))
	print 'a', a
	print 'b', b
	print 'a + b = ', a + b
	print 'a * b = ', a * b
	print '-a = ', -a
	print 'b - a = ', b-a
	print 'b/a = ', b/a
	t_optimize = 0.
	for n in xrange(1000):
	t_optimize -= time()
	a.find(12)
	t_optimize += time()

	t_simple = 0.
	for n in xrange(1000):
	t_simple -= time()
	b.find(17)
	t_simple += time()
	''' nothing surprising, just verifying it works! '''
	''' for small lists it does not make a difference '''
	print "Optimized find:", t_optimize/1000.
	print "Simple find:", t_simple/1000.
	print "Simple/Optimized", t_simple/t_optimize
	a = ListExtended(*range(2, 20000, 1), lookup=True, hashing=True)
	b = ListExtended(*range(4, 40000, 2))
	t_optimize = 0.
	for n in xrange(1000):
	t_optimize -= time()
	a.find(12)
	t_optimize += time()

	t_simple = 0.
	for n in xrange(1000):
	t_simple -= time()
	b.find(17)
	t_simple += time()
	''' for larger lists it _does_ make a difference '''
	print "Optimized find:", t_optimize/1000.
	print "Simple find:", t_simple/1000.
	print "Simple/Optimized", t_simple/t_optimize
No results found