Last active
December 30, 2019 09:43
-
-
Save nhp/3ec4f74f9b357f0bba20b704f07d6555 to your computer and use it in GitHub Desktop.
aws glacier upload from bash with correctly configured aws cli credits to https://github.com/dradtke
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# This script takes a path to a file and uploads it to Amazon | |
# Glacier. It does this in several steps: | |
# | |
# 1. Split the file up into 1MiB chunks. | |
# 2. Initiate a multipart upload. | |
# 3. Upload each part individually. | |
# 4. Calculate the file's tree hash and finish the upload. | |
# | |
# See: http://amzn.to/1RjTwYk | |
# | |
# Author: Damien Radtke <damienradtke at gmail dot com> | |
# License: WTFPL | |
# Set this to the name of the Glacier vault to upload to. | |
VAULT_NAME=... | |
# 1 MiB in bytes; the tree hash algorithm requires chunks of this | |
# size. | |
CHUNK_SIZE=1048576 | |
if [[ -z "${1}" ]]; then | |
echo "No file provided." | |
exit 1 | |
fi | |
ARCHIVE="`realpath ${1}`" | |
ARCHIVE_SIZE=`cat "${ARCHIVE}" | wc --bytes` | |
TEMP=`mktemp --directory` | |
cd "${TEMP}" | |
# Clean up at exit. | |
function cleanup { | |
echo "Cleaning up." | |
cd ~- | |
rm -rf "${TEMP}" | |
} | |
trap cleanup EXIT | |
echo "Initiating multipart upload..." | |
# Split the archive into chunks. | |
split --bytes=${CHUNK_SIZE} "${ARCHIVE}" chunk | |
NUM_CHUNKS=`ls chunk* | wc -l` | |
# Initiate upload. | |
UPLOAD_ID=$(aws glacier initiate-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--archive-description="`basename \"${ARCHIVE}\"`" \ | |
--part-size=${CHUNK_SIZE} \ | |
--query=uploadId | sed 's/"//g') | |
RETVAL=$? | |
if [[ ${RETVAL} -ne 0 ]]; then | |
echo "initiate-multipart-upload failed with status code: ${RETVAL}" | |
exit 1 | |
fi | |
echo "Upload ID: ${UPLOAD_ID}" | |
# Abort the upload if forced to exit. | |
function abort_upload { | |
echo "Aborting upload." | |
aws glacier abort-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" | |
} | |
trap abort_upload SIGINT SIGTERM | |
# Loop through the chunks. | |
INDEX=0 | |
for CHUNK in chunk*; do | |
# Calculate the byte range for this chunk. | |
START=$((INDEX*CHUNK_SIZE)) | |
END=$((((INDEX+1)*CHUNK_SIZE)-1)) | |
END=$((END>(ARCHIVE_SIZE-1)?ARCHIVE_SIZE-1:END)) | |
# Increment the index. | |
INDEX=$((INDEX+1)) | |
while true; do | |
echo "Uploading chunk ${INDEX} / ${NUM_CHUNKS}..." | |
aws glacier upload-multipart-part \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" \ | |
--body="${CHUNK}" \ | |
--range="bytes ${START}-${END}/*" \ | |
>/dev/null | |
RETVAL=$? | |
if [[ ${RETVAL} -eq 0 ]]; then | |
# Upload succeeded, on to the next one. | |
break | |
elif [[ ${RETVAL} -eq 130 ]]; then | |
# Received a SIGINT. | |
exit 1 | |
elif [[ ${RETVAL} -eq 255 ]]; then | |
# Most likely a timeout, just let it try again. | |
echo "Chunk ${INDEX} ran into an error, retrying..." | |
sleep 1 | |
else | |
echo "upload-multipart-part failed with status code: ${RETVAL}" | |
echo "Aborting upload." | |
aws glacier abort-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" | |
exit 1 | |
fi | |
done | |
openssl dgst -sha256 -binary ${CHUNK} > "hash${CHUNK:5}" | |
done | |
# Calculate tree hash. | |
# ("And now for the tricky bit.") | |
echo "Calculating tree hash..." | |
while true; do | |
COUNT=`ls hash* | wc -l` | |
if [[ ${COUNT} -le 2 ]]; then | |
TREE_HASH=$(cat hash* | openssl dgst -sha256 | awk '{print $2}') | |
break | |
fi | |
ls hash* | xargs -n 2 | while read PAIR; do | |
PAIRARRAY=(${PAIR}) | |
if [[ ${#PAIRARRAY[@]} -eq 1 ]]; then | |
break | |
fi | |
cat ${PAIR} | openssl dgst -sha256 -binary > temphash | |
rm ${PAIR} | |
mv temphash "${PAIRARRAY[0]}" | |
done | |
done | |
echo "Finalizing..." | |
aws glacier complete-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" \ | |
--checksum="${TREE_HASH}" \ | |
--archive-size=${ARCHIVE_SIZE} | |
RETVAL=$? | |
if [[ ${RETVAL} -ne 0 ]]; then | |
echo "complete-multipart-upload failed with status code: ${RETVAL}" | |
echo "Aborting upload ${UPLOAD_ID}" | |
aws glacier abort-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" | |
exit 1 | |
fi | |
echo "Done." | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I solved by adding '--suffix-length=6' option to 'SPLIT' command.
The
SPLIT
command stops at 676 split.(aa to zz)to