-
-
Save aelkz/f96a15f1a645073e9c37654faa9089f1 to your computer and use it in GitHub Desktop.
Backup and restore an Elastic search index (shamelessly copied from http://tech.superhappykittymeow.com/?p=296)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# herein we backup our indexes! this script should run at like 6pm or something, after logstash | |
# rotates to a new ES index and theres no new data coming in to the old one. we grab metadatas, | |
# compress the data files, create a restore script, and push it all up to S3. | |
TODAY=`date +"%Y.%m.%d"` | |
INDEXNAME="logstash-$TODAY" # this had better match the index name in ES | |
INDEXDIR="/usr/local/elasticsearch/data/logstash/nodes/0/indices/" | |
BACKUPCMD="/usr/local/backupTools/s3cmd --config=/usr/local/backupTools/s3cfg put" | |
BACKUPDIR="/mnt/es-backups/" | |
YEARMONTH=`date +"%Y-%m"` | |
S3TARGET="s3://backups/elasticsearch/$YEARMONTH/$INDEXNAME" | |
# create mapping file with index settings. this metadata is required by ES to use index file data | |
echo -n "Backing up metadata… " | |
curl -XGET -o /tmp/mapping "http://localhost:9200/$INDEXNAME/_mapping?pretty=true" > /dev/null 2>&1 | |
sed -i '1,2d' /tmp/mapping #strip the first two lines of the metadata | |
echo '{"settings":{"number_of_shards":5,"number_of_replicas":1},"mappings":{' >> /tmp/mappost | |
# prepend hardcoded settings metadata to index-specific metadata | |
cat /tmp/mapping >> /tmp/mappost | |
echo "DONE!" | |
# now lets tar up our data files. these are huge, so lets be nice | |
echo -n "Backing up data files (this may take some time)… " | |
mkdir -p $BACKUPDIR | |
cd $INDEXDIR | |
nice -n 19 tar czf $BACKUPDIR/$INDEXNAME.tar.gz $INDEXNAME | |
echo "DONE!" | |
echo -n "Creating restore script… " | |
# time to create our restore script! oh god scripts creating scripts, this never ends well… | |
cat << EOF >> $BACKUPDIR/$INDEXNAME-restore.sh | |
#!/bin/bash | |
# this script requires $INDEXNAME.tar.gz and will restore it into elasticsearch | |
# it is ESSENTIAL that the index you are restoring does NOT exist in ES. delete it | |
# if it does BEFORE trying to restore data. | |
# create index and mapping | |
echo -n "Creating index and mappings… " | |
curl -XPUT 'http://localhost:9200/$INDEXNAME/' -d '`cat /tmp/mappost`' > /dev/null 2>&1 | |
echo "DONE!" | |
# extract our data files into place | |
echo -n "Restoring index (this may take a while)… " | |
cd $INDEXDIR | |
tar xzf $BACKUPDIR/$INDEXNAME.tar.gz | |
echo "DONE!" | |
# restart ES to allow it to open the new dir and file data | |
echo -n "Restarting Elasticsearch… " | |
/etc/init.d/es restart | |
echo "DONE!" | |
EOF | |
echo "DONE!" # restore script done | |
# push both tar.gz and restore script to s3 | |
echo -n "Saving to S3 (this may take some time)… " | |
$BACKUPCMD $BACKUPDIR/$INDEXNAME.tar.gz $S3TARGET.tar.gz | |
$BACKUPCMD $BACKUPDIR/$INDEXNAME-restore.sh $S3TARGET-restore.sh | |
echo "DONE!" | |
# cleanup tmp files | |
rm /tmp/mappost | |
rm /tmp/mapping |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Performs 'rotation' of ES indices. Maintains only 8 indicies (1 week) of logstash logs; this script | |
# is to be run at midnight daily and removes the oldest one (as well as any 1970s-era log indices, | |
# as these are a product of timestamp fail). Please note the insane amount of error-checking | |
# in this script, as ES would rather delete everything than nothing… | |
# Before we do anything, let's get rid of any nasty 1970s-era indices we have floating around | |
TIMESTAMPFAIL=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |grep 1970 |wc -l` | |
if [ -n $TIMESTAMPFAIL ] | |
then | |
curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |grep 1970 | while read line | |
do | |
echo "Indices with screwed-up timestamps found; removing" | |
echo -n "Deleting index $line: " | |
curl -s -XDELETE http://localhost:9200/$line/ | |
echo "DONE!" | |
done | |
fi | |
# Get list of indices; should we rotate? | |
INDEXCOUNT=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |wc -l` | |
if [ $INDEXCOUNT -lt "9" ] | |
then | |
echo "Less than 8 indices, bailing with no action" | |
exit 0 | |
else | |
echo "More than 8 indices, time to do some cleaning" | |
# Let's do some cleaning! | |
OLDESTLOG=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |head -n1` | |
echo -n "Deleting oldest index, $OLDESTLOG: " | |
curl -s -XDELETE http://localhost:9200/$OLDESTLOG/ | |
echo "DONE!" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment