abesto · March 1, 2012 13:53
diff --git a/simple_redis_profile.rb b/simple_redis_profile.rb
 #!/usr/bin/env ruby

 # Evaluates a sample of keys/values from each redis database, computing statistics for each key pattern:
 #   keys: number of keys matching the given pattern
 #   size: approximation of the associated memory occupied (based on size/length of value)
 #   percent: the proportion of this 'size' relative to the sample's total
 #
 # Copyright Weplay, Inc. 2010. Available for use under the MIT license.
 #
 # Changes in this fork (abesto) by Zoltán Nagy <[email protected]>
 #   Add HOST, PORT, PWD constants
 #   Use MULTI/EXEC to speed up the queries
 #   Use KEYS * instead of RANDOMKEY calls if sample size > 0.7 * db key count
 #   Use DEBUG OBJECT to measure size (key serializedlength), output as human readable size (assuming serializedlength is in bytes)
 #   Replace invalid UTF-8 keys with 'binary key'
 #   Match patterns where the prefix is variable as well as where the postfix is

 require 'rubygems'
 require 'redis'
 require 'yaml'

 SAMPLE_SIZE = 10_000  # number of keys to sample from each db before computing stats
 HOST='?'
 PORT='?'
 PWD='?'

 # http://codereview.stackexchange.com/questions/9107/printing-human-readable-number-of-bytes
 def as_size( s )
  prefix = %W(TiB GiB MiB KiB B)
  s = s.to_f
  i = prefix.length - 1
  while s > 512 && i > 0
    s /= 1024
    i -= 1
  end
  ((s > 9 || s.modulo(1) < 0.1 ? '%d' : '%.1f') % s) + ' ' + prefix[i]
 end


 # Memory footprint: size/length of value.
 def redis_size_query(db, k)
  db.debug('object', k)
 end

 REGEX = /serializedlength:([0-9]*)/
 def redis_size_parse(str)
  Integer(REGEX.match(str)[1])
 end


 def redis_db_profile(db_name, keys)
  db = Redis.new(:port => PORT, :host => HOST)
  db.auth PWD
  db.select db_name

  if SAMPLE_SIZE > keys * 0.7
    keys = db.keys '*'
  else
    keys = db.multi do
      SAMPLE_SIZE.times { |i| db.randomkey }
    end
  end

  key_patterns = keys.group_by{ |key| 
    begin
      key.gsub(/[:_]\d+\b/, ':#').gsub(/\b\d+[:_]/, '#:') 
    rescue
      'binary key'
    end
  }
  puts "Got #{keys.length} keys in #{key_patterns.length} patterns"

  debugs = db.multi do
    keys.map{ |k| redis_size_query(db, k) }
  end
  raw_data = Hash[keys.zip(debugs)]

  data = key_patterns.map{ |pattern, keys|
    [pattern, {'keys' => keys.size, 'size' => (keys.map{|key|
      redis_size_parse(raw_data[key])>
    }).inject(:+)}]
  }.sort_by{ |a| a.last['size'] }.reverse
  size_sum = data.inject(0){|sum, d| sum += d.last['size'] }
  puts "Full size of measured keys: #{as_size size_sum}"
  data.each { |d| d.last['percent'] = '%.2f%' % (d.last['size'].to_f*100/size_sum); d.last['size'] = as_size d.last['size'] }
 end

 redis_cli = "redis-cli -p #{PORT} -h #{HOST} -a #{PWD}"

 dbs = `#{redis_cli} info | grep ^db[0-9]`.split.map{ |line| 
  matches = /^db(\d+):keys=(\d+)/.match line
  [matches[1], Integer(matches[2])]
 }
 dbs.each do |name, keys|
  puts "\nProfiling \"#{name}\"...\n#{'-'*20}"
  y redis_db_profile(name, keys)
 end

 puts "\nOverall statistics:\n#{'-'*20}"
 puts `#{redis_cli} info | grep memory`
	#!/usr/bin/env ruby

	# Evaluates a sample of keys/values from each redis database, computing statistics for each key pattern:
	# keys: number of keys matching the given pattern
	# size: approximation of the associated memory occupied (based on size/length of value)
	# percent: the proportion of this 'size' relative to the sample's total
	#
	# Copyright Weplay, Inc. 2010. Available for use under the MIT license.
	#
	# Changes in this fork (abesto) by Zoltán Nagy <[email protected]>
	# Add HOST, PORT, PWD constants
	# Use MULTI/EXEC to speed up the queries
	# Use KEYS * instead of RANDOMKEY calls if sample size > 0.7 * db key count
	# Use DEBUG OBJECT to measure size (key serializedlength), output as human readable size (assuming serializedlength is in bytes)
	# Replace invalid UTF-8 keys with 'binary key'
	# Match patterns where the prefix is variable as well as where the postfix is

	require 'rubygems'
	require 'redis'
	require 'yaml'

	SAMPLE_SIZE = 10_000 # number of keys to sample from each db before computing stats
	HOST='?'
	PORT='?'
	PWD='?'

	# http://codereview.stackexchange.com/questions/9107/printing-human-readable-number-of-bytes
	def as_size( s )
	prefix = %W(TiB GiB MiB KiB B)
	s = s.to_f
	i = prefix.length - 1
	while s > 512 && i > 0
	s /= 1024
	i -= 1
	end
	((s > 9 \|\| s.modulo(1) < 0.1 ? '%d' : '%.1f') % s) + ' ' + prefix[i]
	end


	# Memory footprint: size/length of value.
	def redis_size_query(db, k)
	db.debug('object', k)
	end

	REGEX = /serializedlength:([0-9]*)/
	def redis_size_parse(str)
	Integer(REGEX.match(str)[1])
	end


	def redis_db_profile(db_name, keys)
	db = Redis.new(:port => PORT, :host => HOST)
	db.auth PWD
	db.select db_name

	if SAMPLE_SIZE > keys * 0.7
	keys = db.keys '*'
	else
	keys = db.multi do
	SAMPLE_SIZE.times { \|i\| db.randomkey }
	end
	end

	key_patterns = keys.group_by{ \|key\|
	begin
	key.gsub(/[:_]\d+\b/, ':#').gsub(/\b\d+[:_]/, '#:')
	rescue
	'binary key'
	end
	}
	puts "Got #{keys.length} keys in #{key_patterns.length} patterns"

	debugs = db.multi do
	keys.map{ \|k\| redis_size_query(db, k) }
	end
	raw_data = Hash[keys.zip(debugs)]

	data = key_patterns.map{ \|pattern, keys\|
	[pattern, {'keys' => keys.size, 'size' => (keys.map{\|key\|
	redis_size_parse(raw_data[key])>
	}).inject(:+)}]
	}.sort_by{ \|a\| a.last['size'] }.reverse
	size_sum = data.inject(0){\|sum, d\| sum += d.last['size'] }
	puts "Full size of measured keys: #{as_size size_sum}"
	data.each { \|d\| d.last['percent'] = '%.2f%' % (d.last['size'].to_f*100/size_sum); d.last['size'] = as_size d.last['size'] }
	end

	redis_cli = "redis-cli -p #{PORT} -h #{HOST} -a #{PWD}"

	dbs = `#{redis_cli} info \| grep ^db[0-9]`.split.map{ \|line\|
	matches = /^db(\d+):keys=(\d+)/.match line
	[matches[1], Integer(matches[2])]
	}
	dbs.each do \|name, keys\|
	puts "\nProfiling \"#{name}\"...\n#{'-'*20}"
	y redis_db_profile(name, keys)
	end

	puts "\nOverall statistics:\n#{'-'*20}"
	puts `#{redis_cli} info \| grep memory`