-
-
Save Luckodjo/9250b9905c5417b7c9b7c428f31a6289 to your computer and use it in GitHub Desktop.
A python script that can copy data from an index or daily indexes from one elasticsearch cluster to another.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
__author__ = "Evantha Manikpura" | |
import datetime | |
import argparse | |
from elasticsearch import Elasticsearch | |
ES_WRITER = None | |
ES_QUERY = None | |
def init(source_ip, dest_ip): | |
global ES_WRITER, ES_QUERY | |
ES_WRITER = Elasticsearch(dest_ip, timeout=480) # elasticsearch host to write data to | |
ES_QUERY = { | |
"source": { | |
"remote": { | |
"host": "http://" + source_ip + ":9200", | |
"socket_timeout": "7m", | |
"connect_timeout": "10s" | |
}, | |
"index": "bo3xlcert-2017.09.07" | |
}, | |
"dest": { | |
"index": "bo3xlcert-2017.09.07", | |
} | |
} | |
def generate_date_range(num_of_days, base=None): | |
if base is None: | |
base = datetime.datetime.today() | |
date_list = [(base - datetime.timedelta(days=x)).date().strftime(DATE_FORMAT) for x in range(0, num_of_days)] | |
return date_list | |
def get_indices_list(index_pattern, num_of_days, base_date=None): | |
indices = list() | |
date_list = generate_date_range(num_of_days, base_date) | |
for date_list_item in date_list: | |
indices.append(index_pattern + '-' + date_list_item) | |
if not date_list: | |
indices.append(index_pattern) | |
return indices | |
def re_index(index_pattern, num_of_days, base_date): | |
indices = get_indices_list(index_pattern, num_of_days, base_date) | |
for index in indices: | |
ES_QUERY['source']['index'] = index | |
ES_QUERY['dest']['index'] = index | |
print 'reindexing | index: %s | status: started' % index | |
try: | |
ES_WRITER.reindex(ES_QUERY) | |
print 'reindexing | index: %s | status: completed' % index | |
except Exception, e: | |
print 'reindexing | index: %s | status: unsucessfull | reason: %s' % (index, e) | |
if __name__ == '__main__': | |
DATE_FORMAT = '%Y.%m.%d' | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--source-ip", type=str, required=True, help="source elasticsearch ip to copy data from") | |
parser.add_argument("--dest-ip", type=str, required=True, help="destination elasticsearch ip to write data to") | |
parser.add_argument("-i", "--base-index", type=str, required=True, help="base index") | |
parser.add_argument("-n", "--num-of-days", default=0, type=int, help="number of days to go back when copying") | |
parser.add_argument("-d", "--base_date", type=str, help="starting date of the daily index") | |
args = parser.parse_args() | |
init(args.source_ip, args.dest_ip) | |
if args.base_date is not None: | |
args.base_date = datetime.datetime.strptime(args.base_date, DATE_FORMAT) | |
re_index(args.base_index, args.num_of_days, args.base_date) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment