Created
April 13, 2014 15:48
-
-
Save yoav-steinberg/10589500 to your computer and use it in GitHub Desktop.
rediff - the redis diff script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urlparse | |
import argparse | |
import redis | |
import sys | |
from multiprocessing import Pool | |
import signal | |
def parse_redis_url(s): | |
url = urlparse.urlparse(s) | |
if not url.scheme: | |
s = 'redis://' + s | |
url = urlparse.urlparse(s) | |
if url.scheme != 'redis': | |
print 'Invalid scheme %s for %s'%(url.scheme,s) | |
return None | |
try: | |
db = int(url.path) | |
except ValueError: | |
db = 0 | |
return (url.hostname, url.port or 6379, url.password, db) | |
def compare_string(src, dst, key): | |
if src.get(key) != dst.get(key): | |
return False | |
return True | |
def compare_hash(src, dst, key): | |
h1 = src.hgetall(key) | |
h2 = dst.hgetall(key) | |
if set(h1) != set(h2): | |
return False | |
for k,v in h1.iteritems(): | |
if h2[k] != v: | |
return False | |
return True | |
def compare_list(src, dst, key): | |
if src.lrange(key, 0, -1) != dst.lrange(key, 0, -1): | |
return False | |
return True | |
def compare_set(src, dst, key): | |
if src.smembers(key) != dst.smembers(key): | |
return False | |
return True | |
def compare_zset(src, dst, key): | |
if src.zrange(key, 0, -1, withscores = True) != dst.zrange(key, 0, -1, withscores = True): | |
return False | |
return True | |
def compare(key): | |
try: | |
res = cmp_funcs[src.type(key)](src, dst, key) | |
if not res: | |
print "key '%s' differs"%key | |
return res | |
except redis.ResponseError as e: | |
print "Error '%s' when comparing key: '%s', skipping"%(e, key) | |
return True | |
def init_worker(): | |
signal.signal(signal.SIGINT, signal.SIG_IGN) | |
if __name__ == '__main__': | |
argparser = argparse.ArgumentParser(description='Redis diff tool.') | |
argparser.add_argument('--src', required='True') | |
argparser.add_argument('--dst', required='True') | |
args = argparser.parse_args() | |
host,port,password,db = parse_redis_url(args.src) | |
src = redis.Redis(host = host, port = port, password = password, db = db) | |
host,port,password,db = parse_redis_url(args.dst) | |
dst = redis.Redis(host = host, port = port, password = password, db = db) | |
src_key_count = src.dbsize() | |
dst_key_count = dst.dbsize() | |
if dst_key_count != src_key_count: | |
print "Source and destination key counts differ (%d : %d)"%(src_key_count, dst_key_count) | |
cmp_funcs = {} | |
for t in ['string', 'hash', 'list', 'set', 'zset']: | |
cmp_funcs[t] = globals().get("compare_" + t) | |
pool = Pool(50, init_worker) | |
it = 0 | |
scanned = 0 | |
diffs = 0 | |
try: | |
while True: | |
it, keys = src.execute_command('scan', int(it), 'count', 1000) | |
if int(it) == 0: | |
break | |
# Use multiprocess pool to compare lots of keys simultaneously | |
res = pool.map(compare, keys) | |
diffs += len([x for x in res if x == False]) | |
# Straight forward non-parallel approach, slower | |
# for key in keys: | |
# try: | |
# if not compare(key): | |
# print "key '%s' differs"%key | |
# diffs += 1 | |
# except redis.ResponseError as e: | |
# print "Error '%s' when comparing key: '%s'"%(e, key) | |
scanned += len(keys) | |
sys.stdout.write("\rprogress: {:3.2f}".format(100.0*float(scanned)/src_key_count)) | |
sys.stdout.flush() | |
print 'Done. Found %d diffs'%diffs | |
except KeyboardInterrupt: | |
print "Caught KeyboardInterrupt, terminating workers" | |
pool.terminate() | |
pool.join() |
Will this work for caches behind vpc?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In general it can be, but it has several drawbacks. For example it can't be used for big collections, because it will lock your redis thread. Also it may probably have different binary formats across different major redis versions(but I am bot sure)
We have compared about 1TB of data between redis cluster 6.0 installed on intel machines and Elasticache(engine: redis 6.2) installed on arm machines and found only 4 cases where comparing through dumps approach showed us false negative. So I am pretty sure that inside one major version of redis it can be compared pretty well.