thanthos · February 26, 2016 06:26
diff --git a/reindex.sh b/reindex.sh
 #Reindexing your Elasticsearch indice with limited resource can be a painw when you have limited resources and need it running at the same time
 #Hence it is advisable to size up the quantity and break it down into chunks based on time. 
 #Look to Kibana. The break down is already done for you even as you perform your search. 
 #Just pop up the request and the aggregation query is there. 
 #Using this, you can tally your document count according to time to verify your activities. 

 #I need to do this as due to resource constrains. Logstash input plugin sometimes hit into error and the plugin restart. 
 #When it restarts the query get executed again. With logstash plugin-input-Elasticsearch, it resume a new search. 
 #Any previous scroll ID is discarded. This is something you do not want happening. 
 #You can end up with more document in the target than the source. #Thus breaking it down to chucks limit the corruption and makes remediation easier. 
 #This automates the process of executing the logstash config one after another. Otherwise, manually is going be costly in terms of time. 

 #So the strategy is like this: 
 #1)create a logstash config template with ${START} and ${END} tag which we will replace using SED command with the actual time value. 
 #2)create a input.data file that will have 2 value per line, START and END EPOCH time. 
 #3)The script will loop through the input and create the actual logstash config file and execute it. 

 #It is my experience that with approx 1GB of memory, you should be performing approx 30K document in one iteration. 
 #Dependency: Logstash (prefereably in path), Cygwin(For windows), sed. Assume everything is happening in the current directory.
 #Lastly using a diff tool to compare the source and target aggregation result to verify the process.



 #Start input.dat content.
 #input.dat content format.
 #1455174000000 1455184800000
 #1455206400000 1455217200000
 #1455271200000 1455282000000
 #1455476400000 1455487200000
 #
 #Make sure it end with a new line.
 #End input.dat content

 #Start Sample Config template
 #You can break up the data into segment anyway you see appropriate beside using time.
 #-----------------------------------------------
 #input {
 #	elasticsearch {
 #		scan => false
 #		query => '
 #{ "query": {
 #    "filtered": {
 #      "query": {"query_string": {"query": "*"}},
 #      "filter": {
 #	"bool": {
 #        "must": [
 #            {"range": {"@timestamp": {"gte": ${START},"lte": ${END},"format": "epoch_millis"}}}  
 #          ],
 #        "must_not": []
 #       	}
 #      }}
 #  }
 #}'
 #	docinfo => true
 #	}
 #}
 #filter {
 #	metrics {
 #	  meter => "events"
 #	  add_tag => "metric"
 #	}
 #}
 #output {
 #	if "metric" in [tags] {
 #        	stdout { }
 #    	}
 #}
 # End Template

 #Start Script file that will read the segmentation, generate the config file and run it. 
 #!/bin/sh
 PREFIX=rindex;

 function run(){
 	cat reindex.preconfig|sed -e 's/\${START}/'$1'/' -e 's/\${END}/'$2'/' > $PREFIX"_"$1".conf";
 	logstash -f $PREFIX"_"$1".conf";
 }

 while read LINE
 do 
 	run $LINE;
 done < input.dat;
	#Reindexing your Elasticsearch indice with limited resource can be a painw when you have limited resources and need it running at the same time
	#Hence it is advisable to size up the quantity and break it down into chunks based on time.
	#Look to Kibana. The break down is already done for you even as you perform your search.
	#Just pop up the request and the aggregation query is there.
	#Using this, you can tally your document count according to time to verify your activities.

	#I need to do this as due to resource constrains. Logstash input plugin sometimes hit into error and the plugin restart.
	#When it restarts the query get executed again. With logstash plugin-input-Elasticsearch, it resume a new search.
	#Any previous scroll ID is discarded. This is something you do not want happening.
	#You can end up with more document in the target than the source. #Thus breaking it down to chucks limit the corruption and makes remediation easier.
	#This automates the process of executing the logstash config one after another. Otherwise, manually is going be costly in terms of time.

	#So the strategy is like this:
	#1)create a logstash config template with ${START} and ${END} tag which we will replace using SED command with the actual time value.
	#2)create a input.data file that will have 2 value per line, START and END EPOCH time.
	#3)The script will loop through the input and create the actual logstash config file and execute it.

	#It is my experience that with approx 1GB of memory, you should be performing approx 30K document in one iteration.
	#Dependency: Logstash (prefereably in path), Cygwin(For windows), sed. Assume everything is happening in the current directory.
	#Lastly using a diff tool to compare the source and target aggregation result to verify the process.



	#Start input.dat content.
	#input.dat content format.
	#1455174000000 1455184800000
	#1455206400000 1455217200000
	#1455271200000 1455282000000
	#1455476400000 1455487200000
	#
	#Make sure it end with a new line.
	#End input.dat content

	#Start Sample Config template
	#You can break up the data into segment anyway you see appropriate beside using time.
	#-----------------------------------------------
	#input {
	# elasticsearch {
	# scan => false
	# query => '
	#{ "query": {
	# "filtered": {
	# "query": {"query_string": {"query": "*"}},
	# "filter": {
	# "bool": {
	# "must": [
	# {"range": {"@timestamp": {"gte": ${START},"lte": ${END},"format": "epoch_millis"}}}
	# ],
	# "must_not": []
	# }
	# }}
	# }
	#}'
	# docinfo => true
	# }
	#}
	#filter {
	# metrics {
	# meter => "events"
	# add_tag => "metric"
	# }
	#}
	#output {
	# if "metric" in [tags] {
	# stdout { }
	# }
	#}
	# End Template

	#Start Script file that will read the segmentation, generate the config file and run it.
	#!/bin/sh
	PREFIX=rindex;

	function run(){
	cat reindex.preconfig\|sed -e 's/\${START}/'$1'/' -e 's/\${END}/'$2'/' > $PREFIX"_"$1".conf";
	logstash -f $PREFIX"_"$1".conf";
	}

	while read LINE
	do
	run $LINE;
	done < input.dat;