mgp · April 6, 2011 04:27
diff --git a/redis_memory_benchmark_v1.py b/redis_memory_benchmark_v1.py
 """
 Copyright Michael Parker 2011.

 Program to quantify the memory usage of:
 a. The compact hash and set implementations in Redis 2.2 described at
   http://redis.io/topics/memory-optimization and
   http://redis.io/presentation/Pnoordhuis_whats_new_in_2_2.pdf versus their
   counterparts that do not contain integers. For the comparisons to be
   meaningful, be sure to have hash-max-zipmap-entries and
   set-max-intset-entries present in your redis.conf file.
 b. zsets compared to sets

 If your redis.conf file is in the same directory as your redis server
 executable, simply run:
 python memory_benchmark.py /path/to/redis/server

 Note that the default number of elements to add per set, 500, is just below the
 set-max-intset-entries directive default value of 512. To observe the gains of
 this encoding, double the set_size argument and halve num_sets, so you are still
 adding the same number of elements across all sets but now exceed the
 set-max-intset-entries threshold. (If you are not using the default of 512,
 adjust these two arguments accordingly.)
 """

 import argparse
 import functools
 import os
 import redis
 import subprocess
 import time
 import uuid

 def set_up_server(parser_args):
  if parser_args.conf_path:
    args = [parser_args.server_path, parser_args.conf_path] 
  else:
    server_path = parser_args.server_path
    dirname = os.path.dirname(server_path)
    conf_path = os.path.join(dirname, 'redis.conf')
    args = [server_path, conf_path]
  # Suppress output from redis server.
  server = subprocess.Popen(args, stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
  return server 

 def get_client(port):
  return redis.Redis(port=port)

 def get_memory_used(client):
  info = client.info()
  return int(info['used_memory'])

 def tear_down_server(server):
  server.terminate()
  server.wait()

 def approximate_size(size):
  # Adapted from http://diveintopython3.org/strings.html.
  multiple = 1024.0
  for suffix in ['KB', 'MB', 'GB', 'TB', 'PB']:
    size /= multiple
    if size < multiple:
      return '%.2f %s' % (size, suffix)
  raise ValueError('number too large')

 def profile(runner, args):
  server = set_up_server(args)
  client = get_client(args.port)

  # The client library will raise ConnectionErrors until it finally connects.
  while True:
    try:
      start_memory = get_memory_used(client)
    except redis.exceptions.ConnectionError:
      # Not ready to accept connections yet, so retry.
      pass
    else:
      break
  start_time = time.time()
  runner(client, args)
  end_time = time.time()
  end_memory = get_memory_used(client)

  diff_time = end_time - start_time
  diff_memory = end_memory - start_memory
  diff_memory_readable = approximate_size(diff_memory)
  print '%s: diff_time=%.2f s, diff_memory=%d bytes (%s)' % (runner.func_name,
      diff_time, diff_memory, diff_memory_readable)

  tear_down_server(server)

 def set_runner(fill, client, args):
  for i in xrange(args.num_sets):
    key = 's:%d' % i
    pipeline = client.pipeline()
    fill(pipeline, key)
    pipeline.execute()

 def set_ints(client, args):
  def fill(pipeline, key):
    for i in xrange(args.set_size):
      pipeline.sadd(key, i * 5)
  set_runner(fill, client, args)

 def set_uuids(client, args):
  def fill(pipeline, key):
    for i in xrange(args.set_size):
      u = uuid.uuid4()
      pipeline.sadd(key, u.hex)
  set_runner(fill, client, args)

 def zset_ints(client, args):
  def fill(pipeline, key):
    for i in xrange(args.set_size):
      pipeline.zadd(key, i * 5, i / 10)
  set_runner(fill, client, args)

 def zset_uuids(client, args):
  def fill(pipeline, key):
    for i in xrange(args.set_size):
      u = uuid.uuid4()
      pipeline.zadd(key, u.hex, i / 10)
  set_runner(fill, client, args)

 def standard_map_add(client, args):
  # For a relevant comparison in timing to compact_map_add, don't pipeline.
  for i in xrange(args.hash_size):
    uuid_bytes = uuid.uuid4().bytes
    key = 'id:%s' % uuid_bytes
    client.set(uuid_bytes, 'foo')

 def compact_map_add(client, args):
  threshold = 500
  for i in xrange(args.hash_size):
    uuid_bytes = uuid.uuid4().bytes
    key = 'id:%s' % uuid_bytes[0]

    # Add value to hash, optimistically assuming will not need to split it.
    pipeline = client.pipeline()
    pipeline.hset(key, uuid_bytes[1:], 'foo')
    pipeline.hlen(key)
    hash_length = pipeline.execute()[1]

    if hash_length == threshold:
      # The hash must be split to remain compact; get its values to move.
      h = client.hgetall(key)
      mappings = {}
      # Group values sharing the same prefix of two characters.
      for remainder in h:
        next_key = key + remainder[0]
        next_remainder = remainder[1:]
        pairs = mappings.get(next_key, None)
        if pairs is None:
          pairs = []
          mappings[next_key] = pairs
        pairs.append((next_remainder, h[remainder]))

      pipeline = client.pipeline()
      # Add each group of values to the hash named after their common prefix.
      for next_key in mappings:
        pairs = mappings[next_key]
        new_values = dict(pairs)
        pipeline.hmset(next_key, new_values)
      # Delete the hash; any values added since retrieving it will also be
      # added to the hash with a prefix of two characters.
      pipeline.delete(key)
      pipeline.execute()
    elif hash_length > threshold:
      # Assuming a concurrent setting, some client found hash_length equal to
      # threshold and is now splitting it, followed by deleting it which will
      # will delete the value added. Therefore also add the value to the hash
      # with a prefix of two characters so it is not lost.
      next_key = key + uuid_bytes[1]
      client.hset(next_key, uuid_bytes[2:], 'foo')

 def run():
  parser = argparse.ArgumentParser(description='Perform memory benchmarks')
  parser.add_argument('--num_sets', type=int, default=1000,
                      help='number of sets to create')
  parser.add_argument('--set_size', type=int, default=500,
                      help='number of elements per set')
  parser.add_argument('--hash_size', type=int, default=2000000,
                      help='number of keys to add to the hash')
  parser.add_argument('--port', type=int, default=6379,
                      help='port of the Redis server')
  parser.add_argument('--conf_path', help='path to the redis.conf file')
  parser.add_argument('server_path', help='path to the redis server')
  args = parser.parse_args()

  # Profile sets versus zsets, and impact of set-max-intset-entries.
  profile(set_ints, args)
  profile(set_uuids, args)
  profile(zset_ints, args)
  profile(zset_uuids, args)

  # Test impact of hash-max-zipmap-entries.
  profile(standard_map_add, args)
  profile(compact_map_add, args)

 if __name__ == '__main__':
  # Defining any variables here will create global variables.
  run()
	"""
	Copyright Michael Parker 2011.

	Program to quantify the memory usage of:
	a. The compact hash and set implementations in Redis 2.2 described at
	http://redis.io/topics/memory-optimization and
	http://redis.io/presentation/Pnoordhuis_whats_new_in_2_2.pdf versus their
	counterparts that do not contain integers. For the comparisons to be
	meaningful, be sure to have hash-max-zipmap-entries and
	set-max-intset-entries present in your redis.conf file.
	b. zsets compared to sets

	If your redis.conf file is in the same directory as your redis server
	executable, simply run:
	python memory_benchmark.py /path/to/redis/server

	Note that the default number of elements to add per set, 500, is just below the
	set-max-intset-entries directive default value of 512. To observe the gains of
	this encoding, double the set_size argument and halve num_sets, so you are still
	adding the same number of elements across all sets but now exceed the
	set-max-intset-entries threshold. (If you are not using the default of 512,
	adjust these two arguments accordingly.)
	"""

	import argparse
	import functools
	import os
	import redis
	import subprocess
	import time
	import uuid

	def set_up_server(parser_args):
	if parser_args.conf_path:
	args = [parser_args.server_path, parser_args.conf_path]
	else:
	server_path = parser_args.server_path
	dirname = os.path.dirname(server_path)
	conf_path = os.path.join(dirname, 'redis.conf')
	args = [server_path, conf_path]
	# Suppress output from redis server.
	server = subprocess.Popen(args, stdout=subprocess.PIPE,
	stderr=subprocess.PIPE)
	return server

	def get_client(port):
	return redis.Redis(port=port)

	def get_memory_used(client):
	info = client.info()
	return int(info['used_memory'])

	def tear_down_server(server):
	server.terminate()
	server.wait()

	def approximate_size(size):
	# Adapted from http://diveintopython3.org/strings.html.
	multiple = 1024.0
	for suffix in ['KB', 'MB', 'GB', 'TB', 'PB']:
	size /= multiple
	if size < multiple:
	return '%.2f %s' % (size, suffix)
	raise ValueError('number too large')

	def profile(runner, args):
	server = set_up_server(args)
	client = get_client(args.port)

	# The client library will raise ConnectionErrors until it finally connects.
	while True:
	try:
	start_memory = get_memory_used(client)
	except redis.exceptions.ConnectionError:
	# Not ready to accept connections yet, so retry.
	pass
	else:
	break
	start_time = time.time()
	runner(client, args)
	end_time = time.time()
	end_memory = get_memory_used(client)

	diff_time = end_time - start_time
	diff_memory = end_memory - start_memory
	diff_memory_readable = approximate_size(diff_memory)
	print '%s: diff_time=%.2f s, diff_memory=%d bytes (%s)' % (runner.func_name,
	diff_time, diff_memory, diff_memory_readable)

	tear_down_server(server)

	def set_runner(fill, client, args):
	for i in xrange(args.num_sets):
	key = 's:%d' % i
	pipeline = client.pipeline()
	fill(pipeline, key)
	pipeline.execute()

	def set_ints(client, args):
	def fill(pipeline, key):
	for i in xrange(args.set_size):
	pipeline.sadd(key, i * 5)
	set_runner(fill, client, args)

	def set_uuids(client, args):
	def fill(pipeline, key):
	for i in xrange(args.set_size):
	u = uuid.uuid4()
	pipeline.sadd(key, u.hex)
	set_runner(fill, client, args)

	def zset_ints(client, args):
	def fill(pipeline, key):
	for i in xrange(args.set_size):
	pipeline.zadd(key, i * 5, i / 10)
	set_runner(fill, client, args)

	def zset_uuids(client, args):
	def fill(pipeline, key):
	for i in xrange(args.set_size):
	u = uuid.uuid4()
	pipeline.zadd(key, u.hex, i / 10)
	set_runner(fill, client, args)

	def standard_map_add(client, args):
	# For a relevant comparison in timing to compact_map_add, don't pipeline.
	for i in xrange(args.hash_size):
	uuid_bytes = uuid.uuid4().bytes
	key = 'id:%s' % uuid_bytes
	client.set(uuid_bytes, 'foo')

	def compact_map_add(client, args):
	threshold = 500
	for i in xrange(args.hash_size):
	uuid_bytes = uuid.uuid4().bytes
	key = 'id:%s' % uuid_bytes[0]

	# Add value to hash, optimistically assuming will not need to split it.
	pipeline = client.pipeline()
	pipeline.hset(key, uuid_bytes[1:], 'foo')
	pipeline.hlen(key)
	hash_length = pipeline.execute()[1]

	if hash_length == threshold:
	# The hash must be split to remain compact; get its values to move.
	h = client.hgetall(key)
	mappings = {}
	# Group values sharing the same prefix of two characters.
	for remainder in h:
	next_key = key + remainder[0]
	next_remainder = remainder[1:]
	pairs = mappings.get(next_key, None)
	if pairs is None:
	pairs = []
	mappings[next_key] = pairs
	pairs.append((next_remainder, h[remainder]))

	pipeline = client.pipeline()
	# Add each group of values to the hash named after their common prefix.
	for next_key in mappings:
	pairs = mappings[next_key]
	new_values = dict(pairs)
	pipeline.hmset(next_key, new_values)
	# Delete the hash; any values added since retrieving it will also be
	# added to the hash with a prefix of two characters.
	pipeline.delete(key)
	pipeline.execute()
	elif hash_length > threshold:
	# Assuming a concurrent setting, some client found hash_length equal to
	# threshold and is now splitting it, followed by deleting it which will
	# will delete the value added. Therefore also add the value to the hash
	# with a prefix of two characters so it is not lost.
	next_key = key + uuid_bytes[1]
	client.hset(next_key, uuid_bytes[2:], 'foo')

	def run():
	parser = argparse.ArgumentParser(description='Perform memory benchmarks')
	parser.add_argument('--num_sets', type=int, default=1000,
	help='number of sets to create')
	parser.add_argument('--set_size', type=int, default=500,
	help='number of elements per set')
	parser.add_argument('--hash_size', type=int, default=2000000,
	help='number of keys to add to the hash')
	parser.add_argument('--port', type=int, default=6379,
	help='port of the Redis server')
	parser.add_argument('--conf_path', help='path to the redis.conf file')
	parser.add_argument('server_path', help='path to the redis server')
	args = parser.parse_args()

	# Profile sets versus zsets, and impact of set-max-intset-entries.
	profile(set_ints, args)
	profile(set_uuids, args)
	profile(zset_ints, args)
	profile(zset_uuids, args)

	# Test impact of hash-max-zipmap-entries.
	profile(standard_map_add, args)
	profile(compact_map_add, args)

	if __name__ == '__main__':
	# Defining any variables here will create global variables.
	run()