Skip to content

Instantly share code, notes, and snippets.

@evantha
Last active November 16, 2022 14:40
Show Gist options
  • Save evantha/7649fb8af32e3ee9d09b1134576ed357 to your computer and use it in GitHub Desktop.
Save evantha/7649fb8af32e3ee9d09b1134576ed357 to your computer and use it in GitHub Desktop.
A python script that can copy data from an index or daily indexes from one elasticsearch cluster to another.
#!/usr/bin/env python
__author__ = "Evantha Manikpura"
import datetime
import argparse
from elasticsearch import Elasticsearch
ES_WRITER = None
ES_QUERY = None
def init(source_ip, dest_ip):
global ES_WRITER, ES_QUERY
ES_WRITER = Elasticsearch(dest_ip, timeout=480) # elasticsearch host to write data to
ES_QUERY = {
"source": {
"remote": {
"host": "http://" + source_ip + ":9200",
"socket_timeout": "7m",
"connect_timeout": "10s"
},
"index": "bo3xlcert-2017.09.07"
},
"dest": {
"index": "bo3xlcert-2017.09.07",
}
}
def generate_date_range(num_of_days, base=None):
if base is None:
base = datetime.datetime.today()
date_list = [(base - datetime.timedelta(days=x)).date().strftime(DATE_FORMAT) for x in range(0, num_of_days)]
return date_list
def get_indices_list(index_pattern, num_of_days, base_date=None):
indices = list()
date_list = generate_date_range(num_of_days, base_date)
for date_list_item in date_list:
indices.append(index_pattern + '-' + date_list_item)
if not date_list:
indices.append(index_pattern)
return indices
def re_index(index_pattern, num_of_days, base_date):
indices = get_indices_list(index_pattern, num_of_days, base_date)
for index in indices:
ES_QUERY['source']['index'] = index
ES_QUERY['dest']['index'] = index
print 'reindexing | index: %s | status: started' % index
try:
ES_WRITER.reindex(ES_QUERY)
print 'reindexing | index: %s | status: completed' % index
except Exception, e:
print 'reindexing | index: %s | status: unsucessfull | reason: %s' % (index, e)
if __name__ == '__main__':
DATE_FORMAT = '%Y.%m.%d'
parser = argparse.ArgumentParser()
parser.add_argument("--source-ip", type=str, required=True, help="source elasticsearch ip to copy data from")
parser.add_argument("--dest-ip", type=str, required=True, help="destination elasticsearch ip to write data to")
parser.add_argument("-i", "--base-index", type=str, required=True, help="base index")
parser.add_argument("-n", "--num-of-days", default=0, type=int, help="number of days to go back when copying")
parser.add_argument("-d", "--base_date", type=str, help="starting date of the daily index")
args = parser.parse_args()
init(args.source_ip, args.dest_ip)
if args.base_date is not None:
args.base_date = datetime.datetime.strptime(args.base_date, DATE_FORMAT)
re_index(args.base_index, args.num_of_days, args.base_date)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment