Last active
November 25, 2015 17:07
-
-
Save LeslieK/11196050 to your computer and use it in GitHub Desktop.
Routines that examine the internals of a CPython dictionary for python 3.3 (modification of Brandon Rhodes' code _dictinfo.py) (See comment at bottom.)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Routines that examine the internals of a CPython dictionary using python 3.3.""" | |
from ctypes import Structure, c_ulong, POINTER, cast, py_object, CFUNCTYPE, pointer, c_ssize_t, c_void_p | |
#from math import log | |
UMAX = 2 ** 32 | |
def cbin(n): | |
"""Return `n` as a clean 32-bit binary number, without a leading '0b'.""" | |
if n < 0: | |
n = UMAX + n | |
return '{0:0>32}'.format(bin(n)[2:]) | |
# Create a singleton with which the dictionary routines below can | |
# represent null C pointers. | |
class NULL(object): | |
def __repr__(self): | |
return 'NULL' | |
NULL = NULL() | |
# Create Structures representing the dictionary object and entries, and | |
# give them useful methods making them easier to use. | |
class PyDictKeyEntry(Structure): | |
"""An entry in a dictionary.""" | |
_fields_ = [ | |
('me_hash', c_ulong), | |
('me_key', py_object), | |
('me_value', py_object), | |
] | |
class PyDictObject(Structure): # an incomplete type | |
"""A dictionary object.""" | |
def __len__(self): | |
"""Return the number of dictionary entry slots.""" | |
return self.ma_keys.contents.dk_size | |
def slot_of(self, key): | |
"""Find and return the slot at which `key` is stored.""" | |
kobj = self.ma_keys.contents # PyDictKeysObject | |
for i in range(len(self)): | |
entry = kobj.dk_entries[i] | |
try: | |
entry.me_key | |
except ValueError: | |
continue | |
k = entry.me_key | |
if k is key or (k == key): | |
return i | |
raise KeyError('cannot find key %r' % (key,)) | |
def slot_map(self): | |
"""Return a mapping of keys to their integer slot numbers.""" | |
m = {} | |
kobj = self.ma_keys.contents # PyDictKeysObject | |
for i in range(len(self)): | |
entry = kobj.dk_entries[i] # an entry | |
try: | |
entry.me_value | |
except: | |
continue # me_value is NULL ptr | |
m[entry.me_key] = i | |
return m | |
LOOKUPFUNC = CFUNCTYPE(POINTER(PyDictKeyEntry), POINTER(PyDictObject), py_object, c_ulong, POINTER(POINTER(py_object))) | |
class PyDictKeysObject(Structure): | |
"""A key object""" | |
_fields_ = [ | |
('dk_refcnt', c_ssize_t), | |
('dk_size', c_ssize_t), | |
('dk_lookup', LOOKUPFUNC), | |
('dk_usable', c_ssize_t), | |
('dk_entries', PyDictKeyEntry * 1), | |
] | |
PyDictKeysObject._dk_entries = PyDictKeysObject.dk_entries | |
PyDictKeysObject.dk_entries = property(lambda s: cast(s._dk_entries, POINTER(PyDictKeyEntry * s.dk_size))[0]) | |
PyDictObject._fields_ = [ | |
('ob_refcnt', c_ssize_t), | |
('ob_type', c_void_p), | |
('ma_used', c_ssize_t), | |
('ma_keys', POINTER(PyDictKeysObject)), | |
('ma_values', POINTER(py_object)), # points to array of ptrs | |
] | |
def dictobject(d): | |
"""Return the PyDictObject lying behind the Python dict `d`.""" | |
if not isinstance(d, dict): | |
raise TypeError('cannot create a dictobject from %r' % (d,)) | |
return cast(id(d), POINTER(PyDictObject)).contents | |
# Retrieve the secret dummy object used internally by dictionaries to represent a | |
# previously occupied slot. | |
def getDummy(): | |
"""get the global dummy object""" | |
dummy = None | |
d = {0: 0} | |
del d[0] | |
dummy = dictobject(d).ma_keys.contents.dk_entries[0].me_key | |
del d | |
return dummy | |
# | |
dummy = getDummy() # <dummy key> of <dummy key> type | |
def _probe_steps(dummydict, key, final_slot): | |
"""Find the slots searched to put `key` in `final_slot` of `dummydict`. | |
The `dummydict` should be a dictionary in which `key` once resided | |
in position `final_slot`, but whose entries have all been deleted, | |
leaving dummy entries. This routine will repeatedly try to insert | |
`key` into the dictionary, and each time that it does not land at | |
`final_slot` an obstacle is placed where it has landed instead, | |
until finally the obstacles make `key` land in the `final_slot`. | |
A list of the slots searched is returned. The last element of this | |
list will always be `final_slot`. | |
""" | |
o = dictobject(dummydict) # PyDictObject | |
# Compute the first slot rather than do an expensive search. | |
ko = o.ma_keys.contents # PyDictKeysObject | |
mask = ko.dk_size - 1 | |
slot = hash(key) & mask | |
slots = [int(slot)] # since slot often arrives as a long | |
# Keep adding obstacles until `key` winds up in `final_slot`. | |
while slots[-1] != final_slot: | |
if slot == key: # make sure the integer `slot` is not `key` itself | |
slot += len(o) # don't get why we add this to slot ??? | |
dummydict[slot] = None # add the obstacle | |
dummydict[key] = None # add the key | |
slot = o.slot_of(key) | |
slots.append(slot) | |
del dummydict[key] | |
# Return the sequence of slots that we searched. | |
return slots | |
def probe_steps(keys, key): | |
"""Return the search sequence for `key` for a dict built with `keys`. | |
`keys` - Dictionary keys, in order of their insertion, including `key`. | |
`key` - The key whose collision path we want to explore. | |
""" | |
# Create a dictionary with the given `keys` and figure out at which | |
# slot the target `key` wound up. | |
d = dict.fromkeys(keys) | |
o = dictobject(d) | |
final_slot = o.slot_of(key) | |
# Empty the dictionary so that it contains only dummy entries, then | |
# pass it to the internal _probe_steps() routine. | |
for k in list(d): | |
del d[k] | |
return _probe_steps(d, key, final_slot) | |
def probe_all_steps(keys): | |
"""Return the search sequence for each key in a dict built with `keys`. | |
`keys` - Dictionary keys, in order of their insertion, including `key`. | |
The return value looks like ``{key: [slot, slot, slot], ...}``. | |
""" | |
# Create a dictionary with the given `keys` and find out in which | |
# slot each key wound up. | |
d = dict.fromkeys(keys) | |
o = dictobject(d) | |
m = o.slot_map() | |
# For each key in the dictionary, find its probe list. | |
for key, final_slot in m.items(): | |
for k in list(d): | |
del d[k] # empty the dictionary | |
m[key] = _probe_steps(d, key, final_slot) | |
return m | |
if __name__ == '__main__': | |
# Two tiny tests. | |
steps = probe_steps([2, 3, 10, 18, 1], 10) | |
assert steps == [2, 5] | |
stepmap = probe_all_steps([2, 3, 10, 18, 1]) | |
assert stepmap == {1: [1], 2: [2], 3: [3], 10: [2, 5], 18: [2, 5, 3, 0]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Need to improve these 2 lines:
PyDictKeysObject._dk_entries = PyDictKeysObject.dk_entries
PyDictKeysObject.dk_entries = property(lambda s: cast(s._dk_entries, POINTER(PyDictKeyEntry * s.dk_size))[0])
since the second line is called each time while iterating through the keys in a dictionary.
The idea is to get the size of dk_entries before casting a pointer to it.