Created
June 20, 2018 07:43
-
-
Save jvanasco/7ee03c118885bd06e25b651e3139f606 to your computer and use it in GitHub Desktop.
dogpile and raw redis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
The following are some code snippets on how i optimized a system using Dogpile cache into native Redis, while still leveraging dogpile. | |
This is not fully functional code, it's just snippets to explain and help the next person: | |
General Overview: | |
* The cached data is a computed value of multiple permissions for an A2B relation. | |
* The Redis key is `'calculated-a|%s' % id_a`. | |
* The payload is stored in a field, via `id_b`. | |
* The TTL exists on the KEY. all hash fields should expire when the key does. setting a new field should NOT prolong the TTL (the risk of stale data is too high). | |
* This maps to: | |
** `HSET calculated-a|id_a id_b payload` | |
** `EXPIRE calculated-a|id_a expiry` | |
The original implementation: | |
* Separate keys and normal dogpile.cache for expiry/etc. | |
** `SET calculated-a|id_a id_b payload` | |
* A large dictionary was stored (25+ k/v pairs). | |
The new implementation: | |
* The dictionary is serialized to `values` only. The values are all booleans, so this is a huge savings. | |
* The `CacheRegion.get_or_create` never caches via `should_cache_fn` *always* returning `False`. | |
** Instead, we access the raw Redis client via the backend, and populate Redis | |
** The population occurs via a script, and raw data is used (not a dogpile CachedValue) | |
* A custom deserializer `Serializer_loads` is used to pull the data from Redis, and fakes it into a dogpile compatible value. | |
* Expiry now happens entirely within Redis. | |
Why was dogpile abused like this? | |
1. The application already used dogpile; not having to change any client/integration code was a high priority. | |
2. Dogpile.cache's lock implementation is great. It was getting ugly trying to reimplement the pattern using dogpile.core. | |
3. The payload size was around 4k. It is now down to under 70bytes. | |
Why a Redis hash? | |
1. There are situations where all id_a should be deleted (e.g. id_a is a userid on certain stores - on logout it is cleared). | |
2. Against current usage metrics, putting a TTL on a single hash key is prefereable to 20 normal keys | |
3. This is the simplest form of deployment. another version stashed a similar A2B intersection in two hashes: | |
HSET a2b-a|{id_a} {id_b} {value} | |
HSET a2b-b|{id_b} {id_a} {value} | |
in those versions, more complicated scripts are needed to set values/expiries | |
when a manual expire is needed, the HKEYS are consulted to expire related data, such as the symmetrical stash. | |
for example, this will query all the b-ids correlated to an a-id, delete the entire b-id records, then delete the a-id record | |
local b_ids = redis.call('hkeys', 'a2b-a|' .. ARGV[1]) | |
for i, b_id in ipairs(b_ids) do | |
redis.call('hdel', 'a2b-b|' .. b_id, ARGV[1]) | |
end | |
redis.call('del', 'a2b-a|' .. ARGV[1]) | |
return 1 | |
""" | |
from dogpile.cache.api import CachedValue | |
import msgpack | |
def msgpack_dumps(payload): | |
return msgpack.packb(payload, use_bin_type=True) | |
def msgpack_loads(payload): | |
return msgpack.unpackb(payload, encoding="utf-8") | |
CACHE_REGION = ... cache region ... | |
""" | |
set the dumps to Serializer_dumps | |
set the loads to Serializer_loads | |
""" | |
EXPIRY_SECONDS = 300 | |
payload_template = ('a', 'bb', 'ccc', 'dddd', 'eeeee', 'ffffff', 'ggggggg', 'hhhhhhhh') | |
def calculate_dict(id_a=None, id_b=None): | |
"""custom function to create a dict""" | |
calcluated = {'a': True, | |
'bb': False, | |
'ccc': True, | |
'dddd': False, | |
'eeeee': True, | |
'ffffff': False, | |
'ggggggg': True, | |
'hhhhhhhh': False, | |
} | |
return calcluated | |
# lua script. sets a hash field/value. only sets the ttl if the key didn't exist. | |
hset_with_expiry = '''\ | |
local exists = redis.call('exists', KEYS[1]) | |
redis.call('hset', KEYS[1], ARGV[1], ARGV[2]) | |
if exists == 0 then | |
redis.call('expire', KEYS[1], ARGV[3]) | |
end | |
return 1''' # 1 KEY field value timeout | |
def pack_dictionary(payload): | |
""" | |
turns a dictionary into values, ordered by keys. | |
the values must be dictionaries or strings. | |
""" | |
_rval = [] | |
for (k, v) in sorted(_dict.items()): | |
if isinstance(v, dict): | |
_rval.append(pack_dictionary(v)) | |
else: | |
_rval.append(v) | |
return _rval | |
def unpack_dictionary(payload, template=None, subdicts=None): | |
""" | |
`payload` is the output of `pack_dictionary` | |
`template` is a template of dictionary keys | |
`subdicts` are known keys that have dictionaries. | |
this serializer/deserializer is NOT universal. it's tailored to this data structure. | |
if you only have a single level dict, zip() is way faster. | |
""" | |
output = {} | |
if subdicts: | |
for idx, i in enumerate(template): | |
if i[0] in subdicts: | |
output[i[0]] = unpack_dictionary(payload[idx], i[1], subdicts) | |
else: | |
output[i] = payload[idx] | |
else: | |
for idx, i in enumerate(template): | |
output[i] = payload[idx] | |
return output | |
def Serializer_loads(value): | |
""" | |
this is used as a custom loader for redis | |
after pulling the data out of msgpack, it is deserialized into a dict | |
""" | |
if value: | |
value = msgpack_loads(value) | |
deserialized = unpack_dictionary(value, payload_template, ) | |
return CachedValue(deserialized, | |
{"ct": f_time(), | |
"v": value_version | |
}) | |
return NO_VALUE | |
def Serializer_dumps(value): | |
raise ValueError('never run this!') | |
class CalculatedDictCache(Object): | |
""" | |
the cached values are handled via python classes | |
""" | |
# we'll cache redis-client script objects onto this as needed | |
_script__hset_with_expiry = None | |
def get_CaclulatedDict_a2b(self, id_a, id_b): | |
""" | |
""" | |
# the key | |
key_calculated_a = 'calculated-a|%s' % id_a | |
# we store redis hkeys in a tuple via `dogpile_backend_redis_advanced_hstore` | |
key = (key_calculated_a, id_b) | |
self.query_args = (id_a, id_b) | |
# run getorcreate. | |
# note that we NEVER CACHE | |
cached = CACHE_REGIONS.get_or_create( | |
key, | |
self._populate_CaclulatedDict_a2b, | |
should_cache_fn=lambda x: False, # never cache! | |
) | |
return cached | |
def _populate_CaclulatedDict_a2b(self): | |
(id_a, id_b) = self.query_args | |
# generate the actual dict | |
calculated_dict = calculate_dict(id_a=id_a, id_b=id_b, ) | |
key_calculated_a = 'calculated-a|%s' % id_a | |
# serialize the dictionary into ONLY the values | |
serialized = pack_dictionary(permissions_dict) | |
# pack into msgpack format | |
value_string = msgpack_dumps(serialized) | |
# this uses the raw redis client | |
# `actual_backend` was precomputed for the region. it is an actual backend, and not a proxy | |
# a PR exists to make it part of dogpile | |
redis_client = CACHE_REGION.actual_backend.client | |
# register/cache the script onto the object if needed. | |
if CalculatedDictCache._script__hset_with_expiry is None: | |
CalculatedDictCache._script__hset_with_expiry = redis_client.register_script(hset_with_expiry) | |
# run our cache population | |
result = CalculatedDictCache._script__hset_with_expiry( | |
keys=[key_calculated_a], | |
args=[id_b, value_string, expiry_seconds], | |
client=redis_client | |
) | |
return permissions_dict |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment