Skip to content

Instantly share code, notes, and snippets.

@ATpoint
Last active January 16, 2024 20:02
Show Gist options
  • Save ATpoint/c4eb9d8f324ded3239a07d6b92fe05c6 to your computer and use it in GitHub Desktop.
Save ATpoint/c4eb9d8f324ded3239a07d6b92fe05c6 to your computer and use it in GitHub Desktop.
Tar or zip a directory and validate integrity by comparison of file list md5sums
#!/bin/bash
# BundleAndValidate <FOLDER_TO_BUNDLE> <tar/tarReplaceLink/zip> <Destination>
#
# Tar a folder and validate integrity of the tarball. For this, compare the file listing of the input folder
# with the file listing of the tarball via md5sum. If identical assume tarball is fine.
# Option tar makes a tarball, tarReplacelink uses tar -h to replace links with pointing file and zip makes a zipball.
# Don't be surprised that it can take some time before you see the tarball at destination after submitting the job.
# The first step is a full listing of the origin file, and with many folders and files that can take a while.
# Uses mbuffer internally to buffer incoming tar stream.
# USAGE: $1 is the input directory, $2 is zip or tar, $3 is destination
function BundleAndValidate {
set -euo pipefail
# Check the number of arguments
if [ "$#" -ne 3 ]; then
echo "[USAGE]: ./BundleAndValidate <directory> <tar or tarReplaceLink or zip> <destination>"
exit 1
fi
# Check if the first argument is a directory
if [ ! -d "${1}" ]; then
echo "[ERROR]: The 1st argument must be a directory!"
exit 1
fi
# Check if the second argument is "tar" or "zip"
if [ "$2" != "tar" ] && [ "$2" != "tarReplaceLink" ] && [ "$2" != "zip" ]; then
echo "[ERROR]: The 2nd argument can only be tar or zip!"
exit 1
fi
# Check that $3 is an existing directory
if [ ! -d ${3} ]; then
echo "[ERROR]: The 3rd argument must be an existing directory for the output!"
exit 1
fi
# Check that mbuffer is present
command -v mbuffer >/dev/null 2>&1 || { echo 'mbuffer not in PATH!'; exit 1; }
DESTINATION="${3}"
BASE=$(basename "${1}")
# Skip if success exists
if [ -f "${DESTINATION}/${BASE}___list_success" ]; then
echo "[Info] ${1} exists at Destination -- skipping!"
exit 0
fi
# List content of directory, add trailing "/" because find does not, but both tar tf and zip sf do
echo "[Start ${1} ==> $(date +"%d-%m-%y ::: %T")]"
find "${1}" -maxdepth 999999999 | sed '1s/$/\//' > "${DESTINATION}/${BASE}___list_original"
#--------
# TAR
#--------
if [ "${2}" == "tar" ]; then
tar cf - "${1}" | mbuffer | tee "${DESTINATION}/${BASE}.tar" | tar tf - > "${DESTINATION}/${BASE}___list_received"
fi
if [ "${2}" == "tarReplaceLink" ]; then
tar hcf - "${1}" | mbuffer | tee "${DESTINATION}/${BASE}.tar" | tar tf - > "${DESTINATION}/${BASE}___list_received"
fi
#--------
# ZIP
#--------
if [ "${2}" == "zip" ]; then
zip -r -9 "${DESTINATION}/${BASE}.zip" "${1}"
unzip -Z1 "${DESTINATION}/${BASE}.zip" > "${DESTINATION}/${BASE}___list_received"
fi
#--------
# CHECK
#--------
# Compare md5sums of the file lists, touch a file "success" or "fail"
md5sum "${DESTINATION}/${BASE}___list_received" > "${DESTINATION}/${BASE}___list_received.md5"
md5sum --check "${DESTINATION}/${BASE}___list_received.md5" \
&& touch "${DESTINATION}/${BASE}___list_success" \
|| touch "${DESTINATION}/${BASE}___list_fail"
echo "[Finish ${1} ==> $(date +"%d-%m-%y ::: %T")]"
}; export -f BundleAndValidate
if [ "$#" -ne 3 ]; then BundleAndValidate; fi
BundleAndValidate "${1}" "${2}" "${3}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment