Created
December 27, 2015 13:10
-
-
Save ourway/7fa9aefce098d6377a6d to your computer and use it in GitHub Desktop.
Dictionary implementation in pure Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A Python dict implementation. | |
""" | |
import collections | |
MINSIZE = 8 | |
PERTURB_SHIFT = 5 | |
dummy = "<dummy key>" | |
class Entry(object): | |
""" | |
A hash table entry. | |
Attributes: | |
* key - The key for this entry. | |
* hash - The has of the key. | |
* value - The value associated with the key. | |
""" | |
__slots__ = ("key", "value", "hash") | |
def __init__(self): | |
self.key = None | |
self.value = None | |
self.hash = 0 | |
def __repr__(self): | |
return "<Entry: key={0} value={1}>".format(self.key, self.value) | |
class Dict(object): | |
""" | |
A mapping interface implemented as a hash table. | |
Attributes: | |
* used - The number of entires used in the table. | |
* filled - used + number of entries with a dummy key. | |
* table - List of entries; contains the actual dict data. | |
* mask - Length of table - 1. Used to fetch values. | |
""" | |
__slots__ = ("filled", "used", "mask", "table") | |
def __init__(self, arg=None, **kwargs): | |
self.clear() | |
self._update(arg, kwargs) | |
@classmethod | |
def fromkeys(cls, keys, value=0): | |
""" | |
Return a new dictionary from a sequence of keys. | |
""" | |
d = cls() | |
for key in keys: | |
d[key] = value | |
return d | |
def clear(self): | |
""" | |
Clear the dictionary of all data. | |
""" | |
self.filled = 0 | |
self.used = 0 | |
self.mask = MINSIZE - 1 | |
self.table = [] | |
# Initialize the table to a clean slate of entries. | |
for i in range(MINSIZE): | |
self.table.append(Entry()) | |
def pop(self, *args): | |
""" | |
Remove and return the value for a key. | |
""" | |
have_default = len(args) == 2 | |
try: | |
v = self[args[0]] | |
except KeyError: | |
if have_default: | |
return args[1] | |
raise | |
else: | |
del self[args[0]] | |
return v | |
def popitem(self): | |
""" | |
Remove and return any key-value pair from the dictionary. | |
""" | |
if self.used == 0: | |
raise KeyError("empty dictionary") | |
entry0 = self.table[0] | |
entry = entry0 | |
i = 0 | |
if entry0.value is None: | |
# The first entry in the table's hash is abused to hold the index to | |
# the next place to look for a value to pop. | |
i = entry0.hash | |
if i > self.mask or i < i: | |
i = 1 | |
entry = self.table[i] | |
while entry.value is None: | |
i += 1 | |
if i > self.mask: | |
i = 1 | |
entry = self.table[i] | |
res = entry.key, entry.value | |
self._del(entry) | |
# Set the next place to start. | |
entry0.hash = i + 1 | |
return res | |
def setdefault(self, key, default=0): | |
""" | |
If key is in the dictionary, return it. Otherwise, set it to the default | |
value. | |
""" | |
val = self._lookup(key).value | |
if val is None: | |
self[key] = default | |
return default | |
return val | |
def _lookup(self, key): | |
""" | |
Find the entry for a key. | |
""" | |
key_hash = hash(key) | |
i = key_hash & self.mask | |
entry = self.table[i] | |
if entry.key is None or entry is key: | |
return entry | |
free = None | |
if entry.key is dummy: | |
free = entry | |
elif entry.hash == key_hash and key == entry.key: | |
return entry | |
perturb = key_hash | |
while True: | |
i = (i << 2) + i + perturb + 1; | |
entry = self.table[i & self.mask] | |
if entry.key is None: | |
return entry if free is None else free | |
if entry.key is key or \ | |
(entry.hash == key_hash and key == entry.key): | |
return entry | |
elif entry.key is dummy and free is None: | |
free = dummy | |
perturb >>= PERTURB_SHIFT | |
assert False, "not reached" | |
def _resize(self, minused): | |
""" | |
Resize the dictionary to at least minused. | |
""" | |
newsize = MINSIZE | |
# Find the smalled value for newsize. | |
while newsize <= minused and newsize > 0: | |
newsize <<= 1 | |
oldtable = self.table | |
# Create a new table newsize long. | |
newtable = [] | |
while len(newtable) < newsize: | |
newtable.append(Entry()) | |
# Replace the old table. | |
self.table = newtable | |
self.used = 0 | |
self.filled = 0 | |
# Copy the old data into the new table. | |
for entry in oldtable: | |
if entry.value is not None: | |
self._insert_into_clean(entry) | |
elif entry.key is dummy: | |
entry.key = None | |
self.mask = newsize - 1 | |
def _insert_into_clean(self, entry): | |
""" | |
Insert an item in a clean dict. This is a helper for resizing. | |
""" | |
i = entry.hash & self.mask | |
new_entry = self.table[i] | |
perturb = entry.hash | |
while new_entry.key is not None: | |
i = (i << 2) + i + perturb + 1 | |
new_entry = self.table[i & self.mask] | |
perturb >>= PERTURB_SHIFT | |
new_entry.key = entry.key | |
new_entry.value = entry.value | |
new_entry.hash = entry.hash | |
self.used += 1 | |
self.filled += 1 | |
def _insert(self, key, value): | |
""" | |
Add a new value to the dictionary or replace an old one. | |
""" | |
entry = self._lookup(key) | |
if entry.value is None: | |
self.used += 1 | |
if entry.key is not dummy: | |
self.filled += 1 | |
entry.key = key | |
entry.hash = hash(key) | |
entry.value = value | |
def _del(self, entry): | |
""" | |
Mark an entry as free with the dummy key. | |
""" | |
entry.key = dummy | |
entry.value = None | |
self.used -= 1 | |
def __getitem__(self, key): | |
value = self._lookup(key).value | |
if value is None: | |
# Check if we're a subclass. | |
if type(self) is not Dict: | |
# Try to call the __missing__ method. | |
missing = getattr(self, "__missing__") | |
if missing is not None: | |
return missing(key) | |
raise KeyError("no such key: {0!r}".format(key)) | |
return value | |
def __setitem__(self, key, what): | |
# None is used as a marker for empty entries, so it can't be in a | |
# dictionary. | |
assert what is not None and key is not None, \ | |
"key and value must not be None" | |
old_used = self.used | |
self._insert(key, what) | |
# Maybe resize the dict. | |
if not (self.used > old_used and | |
self.filled*3 >= (self.mask + 1)*2): | |
return | |
# Large dictionaries (< 5000) are only doubled in size. | |
factor = 2 if self.used > 5000 else 4 | |
self._resize(factor*self.used) | |
def __delitem__(self, key): | |
entry = self._lookup(key) | |
if entry.value is None: | |
raise KeyError("no such key: {0!r}".format(key)) | |
self._del(entry) | |
def __contains__(self, key): | |
""" | |
Check if a key is in the dictionary. | |
""" | |
return self._lookup(key).value is not None | |
def __eq__(self, other): | |
if not isinstance(other, Dict): | |
try: | |
# Try to coerce the other to a Dict, so we can compare it. | |
other = Dict(other) | |
except TypeError: | |
return NotImplemented | |
if self.used != other.used: | |
# They're not the same size. | |
return False | |
# Look through the table and compare every entry, breaking out early if | |
# we find a difference. | |
for entry in self.table: | |
if entry.value is not None: | |
try: | |
bval = other[entry.key] | |
except KeyError: | |
return False | |
if not bval == entry.value: | |
return False | |
return True | |
def __ne__(self, other): | |
return not self == other | |
def keys(self): | |
""" | |
Return a list of keys in the dictionary. | |
""" | |
return [entry.key for entry in self.table if entry.value is not None] | |
def values(self): | |
""" | |
Return a list of values in the dictionary. | |
""" | |
return [entry.value for entry in self.table if entry.value is not None] | |
def items(self): | |
""" | |
Return a list of key-value pairs. | |
""" | |
return [(entry.key, entry.value) for entry in self.table | |
if entry.value is not None] | |
def __iter__(self): | |
return DictKeysIterator(self) | |
def itervalues(self): | |
""" | |
Return an iterator over the values in the dictionary. | |
""" | |
return DictValuesIterator(self) | |
def iterkeys(self): | |
""" | |
Return an iterator over the keys in the dictionary. | |
""" | |
return DictKeysIterator(self) | |
def iteritems(self): | |
""" | |
Return an iterator over key-value pairs. | |
""" | |
return DictItemsIterator(self) | |
def _merge(self, mapping): | |
""" | |
Update the dictionary from a mapping. | |
""" | |
for key in mapping.keys(): | |
self[key] = mapping[key] | |
def _from_sequence(self, seq): | |
for double in seq: | |
if len(double) != 2: | |
raise ValueError("{0!r} doesn't have a length of 2".format( | |
double)) | |
self[double[0]] = double[1] | |
def _update(self, arg, kwargs): | |
if arg: | |
if isinstance(arg, collections.Mapping): | |
self._merge(arg) | |
else: | |
self._from_sequence(arg) | |
if kwargs: | |
self._merge(kwargs) | |
def update(self, arg=None, **kwargs): | |
""" | |
Update the dictionary from a mapping or sequence containing key-value | |
pairs. Any existing values are overwritten. | |
""" | |
self._update(arg, kwargs) | |
def get(self, key, default=0): | |
""" | |
Return the value for key if it exists otherwise the default. | |
""" | |
try: | |
return self[key] | |
except KeyError: | |
return default | |
def __len__(self): | |
return self.used | |
def __repr__(self): | |
r = ["{0!r} : {1!r}".format(k, v) for k, v in self.iteritems()] | |
return "Dict({" + ", ".join(r) + "})" | |
collections.Mapping.register(Dict) | |
class DictIterator(object): | |
def __init__(self, d): | |
self.d = d | |
self.used = self.d.used | |
self.len = self.d.used | |
self.pos = 0 | |
def __iter__(self): | |
return self | |
def next(self): | |
# Check if the dictionary has been mutated under us. | |
if self.used != self.d.used: | |
# Make this state permanent. | |
self.used = -1 | |
raise RuntimeError("dictionary size changed during interation") | |
i = self.pos | |
while i <= self.d.mask and self.d.table[i].value is None: | |
i += 1 | |
self.pos = i + 1 | |
if i > self.d.mask: | |
# We're done. | |
raise StopIteration | |
self.len -= 1 | |
return self._extract(self.d.table[i]) | |
__next__ = next | |
def _extract(self, entry): | |
return getattr(entry, self.kind) | |
def __len__(self): | |
return self.len | |
class DictKeysIterator(DictIterator): | |
kind = "key" | |
class DictValuesIterator(DictIterator): | |
kind = "value" | |
class DictItemsIterator(DictIterator): | |
def _extract(self, entry): | |
return entry.key, entry.value |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment