Last active
December 30, 2019 09:43
-
-
Save nhp/3ec4f74f9b357f0bba20b704f07d6555 to your computer and use it in GitHub Desktop.
aws glacier upload from bash with correctly configured aws cli credits to https://github.com/dradtke
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# This script takes a path to a file and uploads it to Amazon | |
# Glacier. It does this in several steps: | |
# | |
# 1. Split the file up into 1MiB chunks. | |
# 2. Initiate a multipart upload. | |
# 3. Upload each part individually. | |
# 4. Calculate the file's tree hash and finish the upload. | |
# | |
# See: http://amzn.to/1RjTwYk | |
# | |
# Author: Damien Radtke <damienradtke at gmail dot com> | |
# License: WTFPL | |
# Set this to the name of the Glacier vault to upload to. | |
VAULT_NAME=... | |
# 1 MiB in bytes; the tree hash algorithm requires chunks of this | |
# size. | |
CHUNK_SIZE=1048576 | |
if [[ -z "${1}" ]]; then | |
echo "No file provided." | |
exit 1 | |
fi | |
ARCHIVE="`realpath ${1}`" | |
ARCHIVE_SIZE=`cat "${ARCHIVE}" | wc --bytes` | |
TEMP=`mktemp --directory` | |
cd "${TEMP}" | |
# Clean up at exit. | |
function cleanup { | |
echo "Cleaning up." | |
cd ~- | |
rm -rf "${TEMP}" | |
} | |
trap cleanup EXIT | |
echo "Initiating multipart upload..." | |
# Split the archive into chunks. | |
split --bytes=${CHUNK_SIZE} "${ARCHIVE}" chunk | |
NUM_CHUNKS=`ls chunk* | wc -l` | |
# Initiate upload. | |
UPLOAD_ID=$(aws glacier initiate-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--archive-description="`basename \"${ARCHIVE}\"`" \ | |
--part-size=${CHUNK_SIZE} \ | |
--query=uploadId | sed 's/"//g') | |
RETVAL=$? | |
if [[ ${RETVAL} -ne 0 ]]; then | |
echo "initiate-multipart-upload failed with status code: ${RETVAL}" | |
exit 1 | |
fi | |
echo "Upload ID: ${UPLOAD_ID}" | |
# Abort the upload if forced to exit. | |
function abort_upload { | |
echo "Aborting upload." | |
aws glacier abort-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" | |
} | |
trap abort_upload SIGINT SIGTERM | |
# Loop through the chunks. | |
INDEX=0 | |
for CHUNK in chunk*; do | |
# Calculate the byte range for this chunk. | |
START=$((INDEX*CHUNK_SIZE)) | |
END=$((((INDEX+1)*CHUNK_SIZE)-1)) | |
END=$((END>(ARCHIVE_SIZE-1)?ARCHIVE_SIZE-1:END)) | |
# Increment the index. | |
INDEX=$((INDEX+1)) | |
while true; do | |
echo "Uploading chunk ${INDEX} / ${NUM_CHUNKS}..." | |
aws glacier upload-multipart-part \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" \ | |
--body="${CHUNK}" \ | |
--range="bytes ${START}-${END}/*" \ | |
>/dev/null | |
RETVAL=$? | |
if [[ ${RETVAL} -eq 0 ]]; then | |
# Upload succeeded, on to the next one. | |
break | |
elif [[ ${RETVAL} -eq 130 ]]; then | |
# Received a SIGINT. | |
exit 1 | |
elif [[ ${RETVAL} -eq 255 ]]; then | |
# Most likely a timeout, just let it try again. | |
echo "Chunk ${INDEX} ran into an error, retrying..." | |
sleep 1 | |
else | |
echo "upload-multipart-part failed with status code: ${RETVAL}" | |
echo "Aborting upload." | |
aws glacier abort-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" | |
exit 1 | |
fi | |
done | |
openssl dgst -sha256 -binary ${CHUNK} > "hash${CHUNK:5}" | |
done | |
# Calculate tree hash. | |
# ("And now for the tricky bit.") | |
echo "Calculating tree hash..." | |
while true; do | |
COUNT=`ls hash* | wc -l` | |
if [[ ${COUNT} -le 2 ]]; then | |
TREE_HASH=$(cat hash* | openssl dgst -sha256 | awk '{print $2}') | |
break | |
fi | |
ls hash* | xargs -n 2 | while read PAIR; do | |
PAIRARRAY=(${PAIR}) | |
if [[ ${#PAIRARRAY[@]} -eq 1 ]]; then | |
break | |
fi | |
cat ${PAIR} | openssl dgst -sha256 -binary > temphash | |
rm ${PAIR} | |
mv temphash "${PAIRARRAY[0]}" | |
done | |
done | |
echo "Finalizing..." | |
aws glacier complete-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" \ | |
--checksum="${TREE_HASH}" \ | |
--archive-size=${ARCHIVE_SIZE} | |
RETVAL=$? | |
if [[ ${RETVAL} -ne 0 ]]; then | |
echo "complete-multipart-upload failed with status code: ${RETVAL}" | |
echo "Aborting upload ${UPLOAD_ID}" | |
aws glacier abort-multipart-upload \ | |
--account-id=- \ | |
--vault-name="${VAULT_NAME}" \ | |
--upload-id="${UPLOAD_ID}" | |
exit 1 | |
fi | |
echo "Done." | |
exit 0 |
Do you have any idea how to solve this?
I solved by adding '--suffix-length=6' option to 'SPLIT' command.
The SPLIT
command stops at 676 split.(aa to zz)
# Split the archive into chunks.
split --bytes=${CHUNK_SIZE} "${ARCHIVE}" chunk
to
# Split the archive into chunks.
split --bytes=${CHUNK_SIZE} --suffix-length=6 "${ARCHIVE}" chunk
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you !! Great script !!
However, I have an error while completing the multipart upload. I have the error
I have 87 files of 4gb each (except the last one 2.1 gb). I checked after finalizing :
Do you have any idea how to solve this?
Even after resplitting the original files into 1Mb parts to compute the tree hash, I have the same error, I just add this part at the end before calculating the treehash
Thanks in advance,