Last active
April 3, 2020 03:25
-
-
Save heyalexchoi/178852bcd618c68b40a4df9ae39dbf1b to your computer and use it in GitHub Desktop.
copy redis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# there are a few different ways to copy data from one redis instance to another | |
# from what i've read, you can set up your new instance as a slave to the old | |
# and let redis replication do its thing | |
# or, it seems you can copy the underlying dump data from one instance's file system to the other's | |
# however, sometimes you don't manage your own redis instances, or maybe you just don't want to bother | |
# with any of that. you could just copy the data. | |
# this script does that. | |
# there were some other scripts online already | |
# - https://github.com/jeremyfa/node-redis-dump | |
# - https://github.com/yaauie/redis-copy | |
# the first one ran out of memory. it's a reported issue. the owner says π€·π½ββοΈ sorry, find another tool. | |
# the second one was really slow. | |
# so, i wrote this one. | |
# it uses redis pipelines, dump, restore, and larger-than-default batch sizes to reduce network latency effects. | |
# it also sets a default ttl on each key it copies over. for my use case these are all good things but you can | |
# adjust the ttl and batch sizes pretty easily below. | |
# I run python 3.6 | |
# ππ½ | |
import os | |
import redis | |
# copy data FROM source | |
source_redis_url = 'redis://user:pass@host:port' | |
# copy data TO destination | |
destination_redis_url = 'redis://user2:pass2@host2:port2' | |
# redis scan count default is 10. | |
# this variable is used for a couple related things | |
# 1. how many keys are requested from redis on each scan | |
# 2. how many commands are dispatched to redis in each pipeline execution | |
# generally speaking, i've found i can reduce significant network latency effects | |
# by batching things in this way. eg, pay the cost for a round trip across the network every | |
# N commands instead of every 1 command. | |
scan_count = 1000 | |
# 72 hour expiration (ttl param is in ms). set this to 0 for no ttl expiration on your keys. | |
default_ttl = 60 * 60 * 72 * 1000 | |
source_conn = redis.StrictRedis.from_url(source_redis_url, decode_responses=False) | |
destination_conn = redis.StrictRedis.from_url(destination_redis_url, decode_responses=False) | |
# pipelines dump calls to keys | |
# returns zipped list of keys and serialized dump results | |
# [[key1, serialized_result1], [key2, serialized_result2],...] | |
def dump_keys(conn, keys): | |
pipe = conn.pipeline() | |
for key in keys: | |
pipe.dump(key) | |
results = pipe.execute() | |
zipped_results = zip(keys, results) | |
return zipped_results | |
# pipelines restore calls | |
# takes in zipped list of keys and serialized dump values | |
# restores serialized value at key, with replacement, and a default ttl of 72 hours | |
def restore_dump_values(conn, zipped_results): | |
pipe = conn.pipeline() | |
for pair in zipped_results: | |
key = pair[0] | |
value = pair[1] | |
pipe.restore(name=key, ttl=default_ttl, value=value, replace=True) | |
result = pipe.execute() | |
return result | |
def dump_and_restore(source_conn, destination_conn, keys): | |
# dump all the keys | |
zipped_results = dump_keys(conn=source_conn, keys=keys) | |
# restore the serialized results | |
restore_result = restore_dump_values(conn=destination_conn, zipped_results=zipped_results) | |
return restore_result | |
total_run = 0 | |
total_success = 0 | |
keys = [] | |
for key in source_conn.scan_iter(count=scan_count): | |
keys.append(key) | |
bucket_count = len(keys) | |
if bucket_count >= scan_count: | |
res = dump_and_restore(source_conn=source_conn, destination_conn=destination_conn, keys=keys) | |
total_run = total_run + len(res) | |
successes = [result for result in res if result == b'OK'] | |
total_success += len(successes) | |
keys = [] | |
print(f"total_run {total_run}") | |
print(f"total_success {total_success}") | |
res = dump_and_restore(source_conn=source_conn, destination_conn=destination_conn, keys=keys) | |
total_run = total_run + len(res) | |
successes = [result for result in res if result == b'OK'] | |
total_success += len(successes) | |
print(f"total_run {total_run}") | |
print(f"total_success {total_success}") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment