Last active
December 30, 2018 17:33
-
-
Save ross-spencer/4bd776a221a26c71ed0d9ee96bc12a34 to your computer and use it in GitHub Desktop.
Bash script to run the various coreutils hash utilities, code copyright Artefactual Systems Inc. GPLv3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Script to run the various coreutils checksum utilities suite. Tools include | |
# sha1sum, sha256sum etc. Easily extensible to the other algorithms in the | |
# same suite, for example, b2sum for Blake2 hash comparison. | |
# | |
# The script can be run standalone outside of Archivematica using a transfer | |
# style layout e.g. | |
# | |
# transfer/ | |
# ├── metadata | |
# │ ├── checksum.md5 | |
# │ ├── checksum.sha1 | |
# │ ├── checksum.sha256 | |
# │ └── checksum.sha512 | |
# └── objects | |
# ├── file1.dat | |
# ├── file1.dat | |
# ├── file3.dat | |
# └── file4.dat | |
# | |
# And the following commands: | |
# | |
# ./{script_name}.sh "absolute path to trnasfer" {date, e.g. 20170605T220452Z}\ | |
# {uuid} {uuid} | |
# | |
# And the output will be send to stdout and stderr. | |
# | |
# For debugging users are recommended to use the `set -eux` flags following the | |
# bash shebang. `set -u` is used by default. | |
# | |
# Dependencies (coreutils (sha1sum, sha256sum, head, etc.); ack) | |
set -u | |
# Variables provided on execution by Archivematica. | |
target="$1" | |
date="$2" | |
eventID="$3" | |
transferUUID="$4" | |
# Cumulative exit_code variable. Assumption being that we won't simply exit if | |
# there is a comparison issue. We'll attempt to run a comparison for each | |
# hash file a user provides. If three files have errors, exit_code will be '3'. | |
# If no files have errors, the exit code will be '0'. | |
exit_code=0 | |
metadata_folder="${target}/metadata/" | |
objects_folder="${target}/objects"; | |
# TOOLMAP for our hash files and command variables. Can be easily extended | |
# for all algorithms provided with coreutils: https://perma.cc/BC92-PUX5 | |
declare -A TOOLMAP | |
TOOLMAP["${metadata_folder}checksum.md5"]="md5sum" | |
TOOLMAP["${metadata_folder}checksum.sha1"]="sha1sum" | |
TOOLMAP["${metadata_folder}checksum.sha256"]="sha256sum" | |
TOOLMAP["${metadata_folder}checksum.sha512"]="sha512sum" | |
echo "Transfer metadata folder:" "${metadata_folder}"; | |
echo "Transfer objects folder:" "${objects_folder}"; | |
# Store current working directory to reset at the end of the script and then | |
# change to the transfer directory. | |
tmp_pwd=$(pwd); | |
cd "${target}"; | |
# Count the number of lines in the given hash file and the number of objects | |
# in the objects/ folder. | |
# | |
# ARG1 ($1): Checksum file | |
# ARG2 ARG ($2): Objects folder | |
# | |
function count_and_compare_lines() | |
{ | |
local checksum_lines=$(cat "${1}" | wc -l) | |
local file_count=$(find "${2}" -type f | wc -l) | |
if [ "${checksum_lines}" -eq "${file_count}" ]; | |
then | |
echo true; | |
else | |
msg="${3}: Comparison failed with %d checksum lines and %d transfer files\n"; | |
printf "${msg}" \ | |
"${checksum_lines}" "${file_count}" >&2; | |
echo false; | |
fi | |
} | |
# Retrieve the file extension to prettify the output of the script. If the | |
# second argument passed to the function is false, then don't output the | |
# extension, output the filename on its own (minus its path structure). | |
# | |
# ARG1 ($1): File path | |
# ARG2 ($2): true for ext, false for filename e.g. checksum.sha256 | |
# | |
function get_file_extension() | |
{ | |
local filename=$(basename -- "${1}"); | |
if [ "${2}" == false ]; | |
then | |
echo "${filename}" | |
else | |
local extension="${filename##*.}"; | |
echo $extension; | |
fi; | |
} | |
# Output the version information from the hash command currently being called. | |
# The version is the first line output from the command's --version flag. | |
# | |
# ARG1 ($1): Command name | |
# | |
function output_hashsum_version() | |
{ | |
echo $("${1}" --version | head -n 1); | |
} | |
# Write out PREMIS information about the comparison happening here. | |
function write_premis() | |
{ | |
"`dirname $0`/createEventsForGroup.py" \ | |
--eventIdentifierUUID "${eventID}" \ | |
--groupUUID "${transferUUID}" \ | |
--groupType "transfer_id" \ | |
--eventType "fixity check" \ | |
--eventDateTime "$date" \ | |
--eventOutcome "${2}" \ | |
--eventDetail "$(output_hashsum_version "${1}")"; | |
} | |
# Retrieve an error string from a given array and output information about what | |
# we find to stderr. | |
# | |
# ARG1 ($1): Array: An array of strings output from the previous hash command. | |
# ARG2 ($2): String: String, e.g. "FAILED" to search for. | |
# ARG3 ($2): String: The hash algorithm currently being checked against. | |
# | |
function retrieve_error() | |
{ | |
echo "${3}:" "${1}" | >&2 ack -w "${2}"; | |
local err=$?; | |
exit_code=$(increment_exit_code "${err}" 0); | |
echo $exit_code; | |
} | |
# Increment the exit code per error found running the hash tools. | |
# | |
# ARG1 ($1): Integer, ideally an $? value | |
# ARG2 ($2): Integer, comparison value to increment counter if -eq true | |
# | |
function increment_exit_code() | |
{ | |
if [ "${1}" -eq "${2}" ]; | |
then | |
local var=$(( "${exit_code}" + 1 )); | |
echo "${var}" | |
else | |
echo "${exit_code}"; | |
fi | |
} | |
# Loop through keys and values the associative array TOOLMAP. | |
for K in "${!TOOLMAP[@]}"; | |
do | |
ext=$(get_file_extension "${K}" true); | |
if [ -f "${K}" ]; | |
then | |
>&2 printf "Comparing transfer checksums with the %s file\n" "${ext}"; | |
ret=$(count_and_compare_lines "${K}" "${objects_folder}" "${ext}"); | |
err=0 | |
tool_err=0; | |
t=0; | |
if [ "${ret}" == false ]; | |
then | |
exit_code=$(increment_exit_code 0 0); | |
else | |
current_exit_code=$exit_code; | |
# Store our multi-line hash command results in an array. | |
declare out=$( "${TOOLMAP[$K]}" -c --strict "${K}" 2>&1); | |
exit_code=$(retrieve_error "${out}" "FAILED" "${ext}"); | |
exit_code=$(retrieve_error "${out}" "improperly formatted" "${ext}"); | |
exit_code=$(retrieve_error "${out}" "no properly formatted" "${ext}"); | |
# Write 'Pass' to PREMIS. PREMIS only written to METS if this | |
# script doesn't fail. | |
# write_premis "${TOOLMAP[$K]}" "Pass"; | |
if [ ! $current_exit_code -lt $exit_code ]; | |
then | |
echo "${ext} comparison was OK"; | |
fi; | |
fi | |
else | |
printf "Nothing to do for %s: File '%s' not provided \n" \ | |
"${ext}" $(get_file_extension "${K}" false); | |
fi; | |
done | |
# Reset to the original working directory and handle script exit. | |
cd "${tmp_pwd}"; | |
>&2 printf "Exiting with code: %d\n" "${exit_code}"; | |
if [ "${exit_code}" -eq 0 ]; | |
then | |
echo "Script exiting without error. Any checksum comparisons made were OK"; | |
fi | |
exit "${exit_code}"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment