Skip to content

Instantly share code, notes, and snippets.

@LeslieK
Last active November 25, 2015 17:07
Show Gist options
  • Save LeslieK/11196050 to your computer and use it in GitHub Desktop.
Save LeslieK/11196050 to your computer and use it in GitHub Desktop.
Routines that examine the internals of a CPython dictionary for python 3.3 (modification of Brandon Rhodes' code _dictinfo.py) (See comment at bottom.)
"""Routines that examine the internals of a CPython dictionary using python 3.3."""
from ctypes import Structure, c_ulong, POINTER, cast, py_object, CFUNCTYPE, pointer, c_ssize_t, c_void_p
#from math import log
UMAX = 2 ** 32
def cbin(n):
"""Return `n` as a clean 32-bit binary number, without a leading '0b'."""
if n < 0:
n = UMAX + n
return '{0:0>32}'.format(bin(n)[2:])
# Create a singleton with which the dictionary routines below can
# represent null C pointers.
class NULL(object):
def __repr__(self):
return 'NULL'
NULL = NULL()
# Create Structures representing the dictionary object and entries, and
# give them useful methods making them easier to use.
class PyDictKeyEntry(Structure):
"""An entry in a dictionary."""
_fields_ = [
('me_hash', c_ulong),
('me_key', py_object),
('me_value', py_object),
]
class PyDictObject(Structure): # an incomplete type
"""A dictionary object."""
def __len__(self):
"""Return the number of dictionary entry slots."""
return self.ma_keys.contents.dk_size
def slot_of(self, key):
"""Find and return the slot at which `key` is stored."""
kobj = self.ma_keys.contents # PyDictKeysObject
for i in range(len(self)):
entry = kobj.dk_entries[i]
try:
entry.me_key
except ValueError:
continue
k = entry.me_key
if k is key or (k == key):
return i
raise KeyError('cannot find key %r' % (key,))
def slot_map(self):
"""Return a mapping of keys to their integer slot numbers."""
m = {}
kobj = self.ma_keys.contents # PyDictKeysObject
for i in range(len(self)):
entry = kobj.dk_entries[i] # an entry
try:
entry.me_value
except:
continue # me_value is NULL ptr
m[entry.me_key] = i
return m
LOOKUPFUNC = CFUNCTYPE(POINTER(PyDictKeyEntry), POINTER(PyDictObject), py_object, c_ulong, POINTER(POINTER(py_object)))
class PyDictKeysObject(Structure):
"""A key object"""
_fields_ = [
('dk_refcnt', c_ssize_t),
('dk_size', c_ssize_t),
('dk_lookup', LOOKUPFUNC),
('dk_usable', c_ssize_t),
('dk_entries', PyDictKeyEntry * 1),
]
PyDictKeysObject._dk_entries = PyDictKeysObject.dk_entries
PyDictKeysObject.dk_entries = property(lambda s: cast(s._dk_entries, POINTER(PyDictKeyEntry * s.dk_size))[0])
PyDictObject._fields_ = [
('ob_refcnt', c_ssize_t),
('ob_type', c_void_p),
('ma_used', c_ssize_t),
('ma_keys', POINTER(PyDictKeysObject)),
('ma_values', POINTER(py_object)), # points to array of ptrs
]
def dictobject(d):
"""Return the PyDictObject lying behind the Python dict `d`."""
if not isinstance(d, dict):
raise TypeError('cannot create a dictobject from %r' % (d,))
return cast(id(d), POINTER(PyDictObject)).contents
# Retrieve the secret dummy object used internally by dictionaries to represent a
# previously occupied slot.
def getDummy():
"""get the global dummy object"""
dummy = None
d = {0: 0}
del d[0]
dummy = dictobject(d).ma_keys.contents.dk_entries[0].me_key
del d
return dummy
#
dummy = getDummy() # <dummy key> of <dummy key> type
def _probe_steps(dummydict, key, final_slot):
"""Find the slots searched to put `key` in `final_slot` of `dummydict`.
The `dummydict` should be a dictionary in which `key` once resided
in position `final_slot`, but whose entries have all been deleted,
leaving dummy entries. This routine will repeatedly try to insert
`key` into the dictionary, and each time that it does not land at
`final_slot` an obstacle is placed where it has landed instead,
until finally the obstacles make `key` land in the `final_slot`.
A list of the slots searched is returned. The last element of this
list will always be `final_slot`.
"""
o = dictobject(dummydict) # PyDictObject
# Compute the first slot rather than do an expensive search.
ko = o.ma_keys.contents # PyDictKeysObject
mask = ko.dk_size - 1
slot = hash(key) & mask
slots = [int(slot)] # since slot often arrives as a long
# Keep adding obstacles until `key` winds up in `final_slot`.
while slots[-1] != final_slot:
if slot == key: # make sure the integer `slot` is not `key` itself
slot += len(o) # don't get why we add this to slot ???
dummydict[slot] = None # add the obstacle
dummydict[key] = None # add the key
slot = o.slot_of(key)
slots.append(slot)
del dummydict[key]
# Return the sequence of slots that we searched.
return slots
def probe_steps(keys, key):
"""Return the search sequence for `key` for a dict built with `keys`.
`keys` - Dictionary keys, in order of their insertion, including `key`.
`key` - The key whose collision path we want to explore.
"""
# Create a dictionary with the given `keys` and figure out at which
# slot the target `key` wound up.
d = dict.fromkeys(keys)
o = dictobject(d)
final_slot = o.slot_of(key)
# Empty the dictionary so that it contains only dummy entries, then
# pass it to the internal _probe_steps() routine.
for k in list(d):
del d[k]
return _probe_steps(d, key, final_slot)
def probe_all_steps(keys):
"""Return the search sequence for each key in a dict built with `keys`.
`keys` - Dictionary keys, in order of their insertion, including `key`.
The return value looks like ``{key: [slot, slot, slot], ...}``.
"""
# Create a dictionary with the given `keys` and find out in which
# slot each key wound up.
d = dict.fromkeys(keys)
o = dictobject(d)
m = o.slot_map()
# For each key in the dictionary, find its probe list.
for key, final_slot in m.items():
for k in list(d):
del d[k] # empty the dictionary
m[key] = _probe_steps(d, key, final_slot)
return m
if __name__ == '__main__':
# Two tiny tests.
steps = probe_steps([2, 3, 10, 18, 1], 10)
assert steps == [2, 5]
stepmap = probe_all_steps([2, 3, 10, 18, 1])
assert stepmap == {1: [1], 2: [2], 3: [3], 10: [2, 5], 18: [2, 5, 3, 0]}
@LeslieK
Copy link
Author

LeslieK commented Apr 28, 2014

Need to improve these 2 lines:
PyDictKeysObject._dk_entries = PyDictKeysObject.dk_entries
PyDictKeysObject.dk_entries = property(lambda s: cast(s._dk_entries, POINTER(PyDictKeyEntry * s.dk_size))[0])

since the second line is called each time while iterating through the keys in a dictionary.

The idea is to get the size of dk_entries before casting a pointer to it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment