Created
February 29, 2012 10:42
-
-
Save nherment/1939828 to your computer and use it in GitHub Desktop.
Backup and restore an Elastic search index (shamelessly copied from http://tech.superhappykittymeow.com/?p=296)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# herein we backup our indexes! this script should run at like 6pm or something, after logstash | |
# rotates to a new ES index and theres no new data coming in to the old one. we grab metadatas, | |
# compress the data files, create a restore script, and push it all up to S3. | |
TODAY=`date +"%Y.%m.%d"` | |
INDEXNAME="logstash-$TODAY" # this had better match the index name in ES | |
INDEXDIR="/usr/local/elasticsearch/data/logstash/nodes/0/indices/" | |
BACKUPCMD="/usr/local/backupTools/s3cmd --config=/usr/local/backupTools/s3cfg put" | |
BACKUPDIR="/mnt/es-backups/" | |
YEARMONTH=`date +"%Y-%m"` | |
S3TARGET="s3://backups/elasticsearch/$YEARMONTH/$INDEXNAME" | |
# create mapping file with index settings. this metadata is required by ES to use index file data | |
echo -n "Backing up metadata… " | |
curl -XGET -o /tmp/mapping "http://localhost:9200/$INDEXNAME/_mapping?pretty=true" > /dev/null 2>&1 | |
sed -i '1,2d' /tmp/mapping #strip the first two lines of the metadata | |
echo '{"settings":{"number_of_shards":5,"number_of_replicas":1},"mappings":{' >> /tmp/mappost | |
# prepend hardcoded settings metadata to index-specific metadata | |
cat /tmp/mapping >> /tmp/mappost | |
echo "DONE!" | |
# now lets tar up our data files. these are huge, so lets be nice | |
echo -n "Backing up data files (this may take some time)… " | |
mkdir -p $BACKUPDIR | |
cd $INDEXDIR | |
nice -n 19 tar czf $BACKUPDIR/$INDEXNAME.tar.gz $INDEXNAME | |
echo "DONE!" | |
echo -n "Creating restore script… " | |
# time to create our restore script! oh god scripts creating scripts, this never ends well… | |
cat << EOF >> $BACKUPDIR/$INDEXNAME-restore.sh | |
#!/bin/bash | |
# this script requires $INDEXNAME.tar.gz and will restore it into elasticsearch | |
# it is ESSENTIAL that the index you are restoring does NOT exist in ES. delete it | |
# if it does BEFORE trying to restore data. | |
# create index and mapping | |
echo -n "Creating index and mappings… " | |
curl -XPUT 'http://localhost:9200/$INDEXNAME/' -d '`cat /tmp/mappost`' > /dev/null 2>&1 | |
echo "DONE!" | |
# extract our data files into place | |
echo -n "Restoring index (this may take a while)… " | |
cd $INDEXDIR | |
tar xzf $BACKUPDIR/$INDEXNAME.tar.gz | |
echo "DONE!" | |
# restart ES to allow it to open the new dir and file data | |
echo -n "Restarting Elasticsearch… " | |
/etc/init.d/es restart | |
echo "DONE!" | |
EOF | |
echo "DONE!" # restore script done | |
# push both tar.gz and restore script to s3 | |
echo -n "Saving to S3 (this may take some time)… " | |
$BACKUPCMD $BACKUPDIR/$INDEXNAME.tar.gz $S3TARGET.tar.gz | |
$BACKUPCMD $BACKUPDIR/$INDEXNAME-restore.sh $S3TARGET-restore.sh | |
echo "DONE!" | |
# cleanup tmp files | |
rm /tmp/mappost | |
rm /tmp/mapping |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Performs 'rotation' of ES indices. Maintains only 8 indicies (1 week) of logstash logs; this script | |
# is to be run at midnight daily and removes the oldest one (as well as any 1970s-era log indices, | |
# as these are a product of timestamp fail). Please note the insane amount of error-checking | |
# in this script, as ES would rather delete everything than nothing… | |
# Before we do anything, let's get rid of any nasty 1970s-era indices we have floating around | |
TIMESTAMPFAIL=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |grep 1970 |wc -l` | |
if [ -n $TIMESTAMPFAIL ] | |
then | |
curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |grep 1970 | while read line | |
do | |
echo "Indices with screwed-up timestamps found; removing" | |
echo -n "Deleting index $line: " | |
curl -s -XDELETE http://localhost:9200/$line/ | |
echo "DONE!" | |
done | |
fi | |
# Get list of indices; should we rotate? | |
INDEXCOUNT=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |wc -l` | |
if [ $INDEXCOUNT -lt "9" ] | |
then | |
echo "Less than 8 indices, bailing with no action" | |
exit 0 | |
else | |
echo "More than 8 indices, time to do some cleaning" | |
# Let's do some cleaning! | |
OLDESTLOG=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |head -n1` | |
echo -n "Deleting oldest index, $OLDESTLOG: " | |
curl -s -XDELETE http://localhost:9200/$OLDESTLOG/ | |
echo "DONE!" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There is a disclaimer on original post.
I still get comments/questions regarding this process I hacked together many moons ago. I must request that anybody who’s looking for a way to backup Elasticsearch indices STOP and do not follow the process described — it was for ES 0.00000000001, written back in 2011. You should not do what I suggest here! I’m saving this purely for historical purposes.
What you should do instead is save your events in flat text — in Logstash, output to both your ES index for searching via Kibana or whatnot, and also output your event to a flat file, likely periodic (per-day or month or whatever). Backup and archive these text files, since they compress quite well. When you want to restore data from a period, just re-process it through Logstash — CPU is cheap nowadays with cloud instances! The data is the important part — processed or not, if you have the data in an easily stored format, you can re-process it.