Last active
April 8, 2018 21:44
-
-
Save portante/1203459a9f5f98f4d08dd1958d078d2d to your computer and use it in GitHub Desktop.
Very hacky way to avoid index creation becoming a bottle neck in OpenShift Aggregated Logging when we have large numbers of indices.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
function finish { | |
rm -rf $TMPDIR | |
} | |
trap finish EXIT | |
TMPDIR=$(mktemp -d) | |
#TODAY='2018.03.27' | |
TODAY=$(date "+%Y.%m.%d") | |
#TODAYSED='2018\.03\.27' | |
TODAYSED=$(date "+%Y\\.%m\\.%d") | |
#TOMOR='2018.03.28' | |
TOMOR=$(date --date="12:00 tomorrow" "+%Y.%m.%d") | |
es_pod=$(basename $(oc get pods -n logging -l component=es -o name | head -n 1)) | |
oc exec -n logging -c elasticsearch $es_pod -- /elasticsearch/persistent/scripts/indices.sh | grep -E '^green open' | grep -F 'project.' | sort -k 3 > $TMPDIR/all.lis | |
grep -F "$TODAY" $TMPDIR/all.lis | awk '{if ($6 > 0) { print $3 }}' > $TMPDIR/today.lis | |
grep -F "$TOMOR" $TMPDIR/all.lis | awk '{ print $3 }' > $TMPDIR/tomor.lis | |
sed "s/\.${TODAYSED}/.${TOMOR}/" $TMPDIR/today.lis > $TMPDIR/tomor-new.lis | |
cat $TMPDIR/tomor.lis $TMPDIR/tomor-new.lis | sort | uniq -c | grep -v " 2 project\." | awk '{print $2}' | grep -F "$TOMOR" > $TMPDIR/create.lis | |
let total=$(wc -l $TMPDIR/create.lis | awk '{ print $1 }') | |
if [ $total -eq 0 ]; then | |
exit 0 | |
fi | |
echo "Creating ${total} new indices..." | |
function wait_for_low_task_queue { | |
mtwt=$(oc exec -n logging -c elasticsearch $es_pod -- es_util --query="_cluster/health" < /dev/null | python -c 'import sys, json; print json.load(sys.stdin)["task_max_waiting_in_queue_millis"]') | |
if [ $mtwt -gt 30000 ]; then | |
while [ $mtwt -gt 1000 ]; do | |
echo " Waiting for 'task_max_waiting_in_queue_millis' to drop to one second or under" | |
sleep 5 | |
mtwt=$(oc exec -n logging -c elasticsearch $es_pod -- es_util --query="_cluster/health" < /dev/null | python -c 'import sys, json; print json.load(sys.stdin)["task_max_waiting_in_queue_millis"]') | |
done | |
fi | |
} | |
let cnt=0 | |
while read idx; do | |
wait_for_low_task_queue | |
let cnt=cnt+1 | |
echo " creating ($cnt of $total) $idx ..." | |
oc exec -n logging -c elasticsearch $es_pod -- /elasticsearch/persistent/scripts/create-index.sh $idx < /dev/null | |
done < $TMPDIR/create.lis |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I ran this on a large cluster with ~7,000 primary shards, and ~14,000 total shards, and found that 3-6 times a day kept the procedure from being intrusive, and the indexing rate smooth across the day time change.