#!/bin/bash |
## Transfer files using globus CLI |
## Author: Samir Amin, @sbamin |
# usage |
show_help() { |
cat << EOF |
Script to start and monitor file transfers using globus. |
Read doc at https://docs.globus.org/cli/reference/transfer/ for command line options. |
Currently supports only intra-institute transfers, i.e., GLOBUS EP id is identical for source and target path. |
Default mode is single: This prefers directory level recursive copy, a single file path may not work. |
Requires globus endpoint activation while transfer session is alive: |
Read https://docs.globus.org/api/transfer/endpoint_activation/ |
On completion, globus task wait may quit without any informative msg but with the correct exit code, |
i.e., exit 0 for successful download. |
#### |
WARN: Overwrites files, dirs with identical names on the target path |
WARN: Transfer skips md5 based verification on the target path, i.e., --sync-level mtime is hardcoded. |
#### |
NB: /tier2/verhaak-lab/ is globus equivalent of verhaak-dev:/verhaak-temp/ path. The latter path will not work in globus. |
Usage: ${0##*/} -i <sample_id> -g <globus ep> -s <path to source dir> -t <path to destination dir> |
-h display this help and exit |
-i sample id or label used with globus label directive (required) |
-g globus EP id matching institute wide license: Defaults to GLOBUSEP env variable |
-s globus compatible full path to source directory (required) |
-t globus compatible full path to target destination (required) |
-m transfer mode: BATCH (requires batchfile) or single directory/file (default) |
-f absolute path to batchfile: required if -m BATCH ; Note that batchfile must have relative paths to -s and -t |
-w run globus task wait (default: no). YES to enable. Useful for running transfer in an non-interactive session. |
-d mirror image (Default: NO) - Use at your own risk! Not available with -m BATCH. |
Example recursive transfer of files from tier2 to /fastscratch space for all of sampel1 files and subdirs. |
${0##*/} -i sample1 -s /tier2/verhaak-lab/mydir/sample1 -t /fastscratch/foo/temp/sample1 |
Example batch mode transfer: Note that -s and -t are used to prepend to relative paths specified in batchfile. |
${0##*/} -i mybatch -s /tier2/verhaak-lab/mydir -t /fastscratch/foo/temp -m BATCH -f /home/foo/batchmode/batch1.tsv |
} |
if [[ $# -lt 3 ]] || [[ $1 == "--help" ]];then show_help;exit 1;fi |
while getopts "i:g:s:t:m:f:w:d:h" opt; do |
case "$opt" in |
h) show_help;exit 0;; |
s) SRC=$OPTARG;; |
'?') show_help >&2 exit 1 ;; |
esac |
done |
if [[ -z "${SAMPLEID}" ]] || [[ -z "${SRC}" ]] || [[ -z "${DEST}" ]]; then |
echo -e "ERROR: Invalid required arguments\\nOne or more of -i, -s, or -t is empty\\nSAMPLEID: ${SAMPLEID}\\nSRC: ${SRC}\\nDEST: ${DEST}\\nWork dir: $(pwd)\\n" >&2 |
show_help |
exit 1 |
fi |
## Defaults to GLOBUSEP env variable if -g is not specified or empty |
#### check globus command #### |
CHK_GLOBUS="$(command -v globus)" |
exit_chk_globus=$? |
if [[ "$exit_chk_globus" != 0 ]]; then |
echo -e "\nERROR: globus command not found in the current enviornment\nSkipping transfer\n" >&2 |
exit 1 |
fi |
#### BE CAREFUL HERE #### |
## enable --delete flag: will mirror destination to source dir and DELETE all |
## other contents on destination. |
## Override user prompt with env variable, GLOBUS_DELMODE set to YES |
## DANGER ## |
## Make sure you are certain of target path is what you wish and not a parent home or work dir else |
## this will delete all of contents except those present in source directory. |
if [[ -z "${MYGLOBUSEP}" ]]; then |
echo -e "ERROR: Invalid GLOBUSEP defined at -g\\nGLOBUSEP: ${MYGLOBUSEP}\\nGLOBUSEP is required unless you are using VerhaakEnv\\nWork dir: $(pwd)\\n" >&2 |
show_help |
exit 1 |
fi |
if [[ "${MODE}" != "BATCH" ]]; then |
echo -e "INFO: MODE is not BATCH but instead $MODE\nDefaults to recursive, directory based globus transfer\n" |
TSTAMP="$(date +%d%b%y_%H%M%S%Z)" |
GLOBUS_TASK_PREFIX="$(printf "globus_%s_%s" "${SAMPLEID}" "${TSTAMP}")" |
if [[ "$GLOBUS_DELMODE" == "YES" ]]; then |
echo -e "\n#### WARN ####\nEnabling DELMODE to YES because you have set an env variable GLOBUS_DELMODE to ${GLOBUS_DELMODE}\n$SRC will be synced to $DEST\nTHIS WILL DELETE EXTRA CONTENTS ON DEST: $DEST\n" |
echo "Ctrl C to abort in 5 seconds" |
sleep 10 |
CMD_TRANSFER=$(printf "globus transfer --label %s_dirmode --recursive --delete --no-verify-checksum --sync-level mtime %s:%s %s:%s >| %s.uuid.txt" "${SAMPLEID}" "${MYGLOBUSEP}" "${SRC}" "${MYGLOBUSEP}" "${DEST}" "${GLOBUS_TASK_PREFIX}") |
elif [[ "$DELMODE" == "YES" ]]; then |
echo -e "\n#### WARN ####\nEnabling DELMODE to $DELMODE\n$SRC will be synced to $DEST\nTHIS WILL DELETE EXTRA CONTENTS ON DEST: $DEST\n" |
read -t 5 -erp "Are you sure? Type YES to consent and run globus transfer..." USERFBK |
if [[ "${USERFBK}" == "YES" ]]; then |
CMD_TRANSFER=$(printf "globus transfer --label %s_dirmode --recursive --delete --no-verify-checksum --sync-level mtime %s:%s %s:%s >| %s.uuid.txt" "${SAMPLEID}" "${MYGLOBUSEP}" "${SRC}" "${MYGLOBUSEP}" "${DEST}" "${GLOBUS_TASK_PREFIX}") |
fi |
else |
CMD_TRANSFER=$(printf "globus transfer --label %s_dirmode --recursive --no-verify-checksum --sync-level mtime %s:%s %s:%s >| %s.uuid.txt" "${SAMPLEID}" "${MYGLOBUSEP}" "${SRC}" "${MYGLOBUSEP}" "${DEST}" "${GLOBUS_TASK_PREFIX}") |
fi |
else |
echo -e "INFO: MODE is BATCH, defaults to batch mode transfer\nSeeking batch file\n" |
if [[ ! -s "${BATCHFILE}" ]]; then |
echo -e "\nERROR: Batchfile at $BATCHFILE is not accessible or zero-byte size.\nQuit transfer\n" >&2 |
show_help |
exit 1 |
fi |
TSTAMP="$(date +%d%b%y_%H%M%S%Z)" |
GLOBUS_TASK_PREFIX="$(printf "globus_%s_%s" "${SAMPLEID}" "${TSTAMP}")" |
CMD_TRANSFER=$(printf "globus transfer --label %s_batchmode --batch --no-verify-checksum --sync-level mtime %s:%s/ %s:%s/ < %s >| %s.uuid.txt" "${SAMPLEID}" "${MYGLOBUSEP}" "${SRC}" "${MYGLOBUSEP}" "${DEST}" "${BATCHFILE}" "${GLOBUS_TASK_PREFIX}") |
echo -e "\n##### NOTE #####\nBATCH mode will use relative paths to copy files\nThat is it will prepend $SRC to source and $DEST to target paths given in batchfile\n\n" |
fi |
printf '\nINFO: %s\nCommand to run\n\n%s\n\nWork dir: %s\n\nCTRL C to abort!\n' "${TSTAMP}" "${CMD_TRANSFER}" "$(pwd)" |
sleep 5 |
## begin globus transfer, capture task ID, and then execute wait command to wait until transfer is complete or fails. |
eval "${CMD_TRANSFER}" |
exitstat1=$? |
if [[ "${exitstat1}" != 0 ]]; then |
TSTAMP="$(date +%d%b%y_%H%M%S%Z)" |
printf '\nERROR: %s\nglobus transfer for %s failed to start with exit code: %s\nCommand executed:\n%s\nGlobus Task ID: not assigned\nSee log file at %s/globus_%s.uuid.txt\nEND\n' "${TSTAMP}" "${SAMPLEID}" "${exitstat1}" "${CMD_TRANSFER}" "$(pwd)" "${SAMPLEID}" >| "${GLOBUS_TASK_PREFIX}"_failed.log |
exit "${exitstat1}" |
else |
echo "Transfer has started" |
sleep 1 |
## monitor globus transfer and exit accordingly |
TSTAMP="$(date +%d%b%y_%H%M%S%Z)" |
GLOBUS_TASKID="$(grep -Eo "[a-z0-9-]{36}" "${GLOBUS_TASK_PREFIX}".uuid.txt)" |
echo -e "\nTo check progress, run\n" |
printf "globus task show %s" "${GLOBUS_TASKID}" |
echo -e "\n\nTo cancel background transfer, run following command\nPS: CTRL C will NOT stop the transfer now.\n" |
printf "globus task cancel %s" "${GLOBUS_TASKID}" |
echo -e "\n" |
fi |
#### Enable globus task wait for non-interactive session and emit valid exit status. |
if [[ "${TASKWAIT}" == "YES" ]]; then |
## start wait command |
echo -e "\n\nStarting globus task wait\nPolling at every 30 seconds\n" |
echo -e "\nPrint dot every 30 seconds until transfer is complete or exit due to an error.\n" |
## globus used set -e internally and quits if exit code is non-zero: So workaround is to quit with exit 1 if it fails |
globus task wait --polling-interval 30 -H "${GLOBUS_TASKID}" && exitstat2=0 || exitstat2=1 |
## capture exit code and exit program accordingly |
if [[ "${exitstat2}" != 0 ]]; then |
TSTAMP="$(date +%d%b%y_%H%M%S%Z)" |
printf '\nERROR: %s\nglobus transfer for %s failed with exit code: %s\nCommand executed:\n%s\nGlobus Task ID: %s\nWork dir: %s\nEND\n' "${TSTAMP}" "${SAMPLEID}" "${exitstat2}" "${CMD_TRANSFER}" "${GLOBUS_TASKID}" "$(pwd)" |& tee -a "${GLOBUS_TASK_PREFIX}"_failed.log |
exit "${exitstat2}" |
else |
TSTAMP="$(date +%d%b%y_%H%M%S%Z)" |
printf '\nINFO: %s\nglobus transfer for %s completed with exit code: %s\nCommand executed: %s\nGlobus Task ID: %s\nWork dir: %s\nEND\n' "${TSTAMP}" "${SAMPLEID}" "${exitstat2}" "${CMD_TRANSFER}" "${GLOBUS_TASKID}" "$(pwd)" |& tee -a "${GLOBUS_TASK_PREFIX}"_success.log |
exit "${exitstat2}" |
fi |
fi |
## END ## |