Created
April 6, 2011 04:27
-
-
Save mgp/905129 to your computer and use it in GitHub Desktop.
Quantifies the memory optimizations of Redis 2.2, and memory costs of zsets versus sets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Copyright Michael Parker 2011. | |
Program to quantify the memory usage of: | |
a. The compact hash and set implementations in Redis 2.2 described at | |
http://redis.io/topics/memory-optimization and | |
http://redis.io/presentation/Pnoordhuis_whats_new_in_2_2.pdf versus their | |
counterparts that do not contain integers. For the comparisons to be | |
meaningful, be sure to have hash-max-zipmap-entries and | |
set-max-intset-entries present in your redis.conf file. | |
b. zsets compared to sets | |
If your redis.conf file is in the same directory as your redis server | |
executable, simply run: | |
python memory_benchmark.py /path/to/redis/server | |
Note that the default number of elements to add per set, 500, is just below the | |
set-max-intset-entries directive default value of 512. To observe the gains of | |
this encoding, double the set_size argument and halve num_sets, so you are still | |
adding the same number of elements across all sets but now exceed the | |
set-max-intset-entries threshold. (If you are not using the default of 512, | |
adjust these two arguments accordingly.) | |
""" | |
import argparse | |
import functools | |
import os | |
import redis | |
import subprocess | |
import time | |
import uuid | |
def set_up_server(parser_args): | |
if parser_args.conf_path: | |
args = [parser_args.server_path, parser_args.conf_path] | |
else: | |
server_path = parser_args.server_path | |
dirname = os.path.dirname(server_path) | |
conf_path = os.path.join(dirname, 'redis.conf') | |
args = [server_path, conf_path] | |
# Suppress output from redis server. | |
server = subprocess.Popen(args, stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
return server | |
def get_client(port): | |
return redis.Redis(port=port) | |
def get_memory_used(client): | |
info = client.info() | |
return int(info['used_memory']) | |
def tear_down_server(server): | |
server.terminate() | |
server.wait() | |
def approximate_size(size): | |
# Adapted from http://diveintopython3.org/strings.html. | |
multiple = 1024.0 | |
for suffix in ['KB', 'MB', 'GB', 'TB', 'PB']: | |
size /= multiple | |
if size < multiple: | |
return '%.2f %s' % (size, suffix) | |
raise ValueError('number too large') | |
def profile(runner, args): | |
server = set_up_server(args) | |
client = get_client(args.port) | |
# The client library will raise ConnectionErrors until it finally connects. | |
while True: | |
try: | |
start_memory = get_memory_used(client) | |
except redis.exceptions.ConnectionError: | |
# Not ready to accept connections yet, so retry. | |
pass | |
else: | |
break | |
start_time = time.time() | |
runner(client, args) | |
end_time = time.time() | |
end_memory = get_memory_used(client) | |
diff_time = end_time - start_time | |
diff_memory = end_memory - start_memory | |
diff_memory_readable = approximate_size(diff_memory) | |
print '%s: diff_time=%.2f s, diff_memory=%d bytes (%s)' % (runner.func_name, | |
diff_time, diff_memory, diff_memory_readable) | |
tear_down_server(server) | |
def set_runner(fill, client, args): | |
for i in xrange(args.num_sets): | |
key = 's:%d' % i | |
pipeline = client.pipeline() | |
fill(pipeline, key) | |
pipeline.execute() | |
def set_ints(client, args): | |
def fill(pipeline, key): | |
for i in xrange(args.set_size): | |
pipeline.sadd(key, i * 5) | |
set_runner(fill, client, args) | |
def set_uuids(client, args): | |
def fill(pipeline, key): | |
for i in xrange(args.set_size): | |
u = uuid.uuid4() | |
pipeline.sadd(key, u.hex) | |
set_runner(fill, client, args) | |
def zset_ints(client, args): | |
def fill(pipeline, key): | |
for i in xrange(args.set_size): | |
pipeline.zadd(key, i * 5, i / 10) | |
set_runner(fill, client, args) | |
def zset_uuids(client, args): | |
def fill(pipeline, key): | |
for i in xrange(args.set_size): | |
u = uuid.uuid4() | |
pipeline.zadd(key, u.hex, i / 10) | |
set_runner(fill, client, args) | |
def standard_map_add(client, args): | |
# For a relevant comparison in timing to compact_map_add, don't pipeline. | |
for i in xrange(args.hash_size): | |
uuid_bytes = uuid.uuid4().bytes | |
key = 'id:%s' % uuid_bytes | |
client.set(uuid_bytes, 'foo') | |
def compact_map_add(client, args): | |
threshold = 500 | |
for i in xrange(args.hash_size): | |
uuid_bytes = uuid.uuid4().bytes | |
key = 'id:%s' % uuid_bytes[0] | |
# Add value to hash, optimistically assuming will not need to split it. | |
pipeline = client.pipeline() | |
pipeline.hset(key, uuid_bytes[1:], 'foo') | |
pipeline.hlen(key) | |
hash_length = pipeline.execute()[1] | |
if hash_length == threshold: | |
# The hash must be split to remain compact; get its values to move. | |
h = client.hgetall(key) | |
mappings = {} | |
# Group values sharing the same prefix of two characters. | |
for remainder in h: | |
next_key = key + remainder[0] | |
next_remainder = remainder[1:] | |
pairs = mappings.get(next_key, None) | |
if pairs is None: | |
pairs = [] | |
mappings[next_key] = pairs | |
pairs.append((next_remainder, h[remainder])) | |
pipeline = client.pipeline() | |
# Add each group of values to the hash named after their common prefix. | |
for next_key in mappings: | |
pairs = mappings[next_key] | |
new_values = dict(pairs) | |
pipeline.hmset(next_key, new_values) | |
# Delete the hash; any values added since retrieving it will also be | |
# added to the hash with a prefix of two characters. | |
pipeline.delete(key) | |
pipeline.execute() | |
elif hash_length > threshold: | |
# Assuming a concurrent setting, some client found hash_length equal to | |
# threshold and is now splitting it, followed by deleting it which will | |
# will delete the value added. Therefore also add the value to the hash | |
# with a prefix of two characters so it is not lost. | |
next_key = key + uuid_bytes[1] | |
client.hset(next_key, uuid_bytes[2:], 'foo') | |
def run(): | |
parser = argparse.ArgumentParser(description='Perform memory benchmarks') | |
parser.add_argument('--num_sets', type=int, default=1000, | |
help='number of sets to create') | |
parser.add_argument('--set_size', type=int, default=500, | |
help='number of elements per set') | |
parser.add_argument('--hash_size', type=int, default=2000000, | |
help='number of keys to add to the hash') | |
parser.add_argument('--port', type=int, default=6379, | |
help='port of the Redis server') | |
parser.add_argument('--conf_path', help='path to the redis.conf file') | |
parser.add_argument('server_path', help='path to the redis server') | |
args = parser.parse_args() | |
# Profile sets versus zsets, and impact of set-max-intset-entries. | |
profile(set_ints, args) | |
profile(set_uuids, args) | |
profile(zset_ints, args) | |
profile(zset_uuids, args) | |
# Test impact of hash-max-zipmap-entries. | |
profile(standard_map_add, args) | |
profile(compact_map_add, args) | |
if __name__ == '__main__': | |
# Defining any variables here will create global variables. | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment