Created
May 10, 2018 17:09
-
-
Save lelegard/be91bbc5e80a978d4d940f7f4724102e to your computer and use it in GitHub Desktop.
Script to recursively list, synchronize or delete directories on a remote site using SFTP only
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#----------------------------------------------------------------------------- | |
# | |
# This script recursively lists a remote directory using SFTP. | |
# It generates a deterministic output format, suitable for scripting. | |
# Syntax: see showhelp() function below. | |
# | |
#----------------------------------------------------------------------------- | |
# | |
# Copyright (c) 2018, Thierry Lelegard | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions are met: | |
# | |
# 1. Redistributions of source code must retain the above copyright notice, | |
# this list of conditions and the following disclaimer. | |
# 2. Redistributions in binary form must reproduce the above copyright | |
# notice, this list of conditions and the following disclaimer in the | |
# documentation and/or other materials provided with the distribution. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |
# THE POSSIBILITY OF SUCH DAMAGE. | |
# | |
#----------------------------------------------------------------------------- | |
SCRIPT=$(basename $BASH_SOURCE) | |
showhelp() | |
{ | |
cat >&2 <<EOF | |
Recursively list a remote directory using SFTP. | |
Usage: $SCRIPT [options] [user@]host [directory] | |
The output is fully deterministic to be easily parsed by a script. Each line | |
describes one file. Fields are separated with one space. The file name comes | |
last and may contain spaces. The output fields are: | |
type mode owner group size date file/path | |
Options: | |
-d | |
--date | |
Display the full dates instead of the number of seconds since epoch. | |
Note that, in that case, the date field contains spaces and it is no | |
longer possible to parse the output lines using 'cut'. | |
--delete | |
Recursively delete files and directories instead of listing them. | |
--dry-run | |
With --delete or --synchronize, display what would be done but don't do it. | |
-e 'wildcard-spec' | |
--exclude 'wildcard-spec' | |
Exclude from the list any file or directory matching the wildcard | |
specification. | |
-h | |
--help | |
Display this help text. | |
-m | |
--mode | |
Display the full mode string instead of the octal value. | |
-n | |
--name-only | |
Display only file names, not full details. | |
-s local-directory | |
--synchronize local-directory | |
Synchronize the content of 'local-directory' with the remote directory. | |
New or newer local files are uploaded. Remote files without equivalent | |
local file are not deleted. | |
-r | |
--reverse-directory | |
Display each directory name after its contents instead of before. | |
-v | |
--verbose | |
Display verbose information with --delete. | |
EOF | |
exit 1 | |
} | |
#----------------------------------------------------------------------------- | |
# Decode command line arguments | |
#----------------------------------------------------------------------------- | |
error() { echo >&2 "$SCRIPT: $*"; exit 1; } | |
usage() { echo >&2 "usage: $SCRIPT [options] [user@]host [directory]"; exit 1; } | |
OPT_HOST= | |
OPT_ROOT= | |
OPT_SYNC= | |
OPT_PREFIX= | |
OPT_DRYRUN=false | |
OPT_RECURSE=false | |
OPT_DELETE=false | |
OPT_FULL_DATE=false | |
OPT_FULL_MODE=false | |
OPT_NAME_ONLY=false | |
OPT_REV_DIR=false | |
OPT_VERBOSE=false | |
OPT_EXCLUDE=(.DS_Store) | |
while [[ $# -gt 0 ]]; do | |
case "$1" in | |
-d|--date) | |
OPT_FULL_DATE=true | |
OPTIONS="$OPTIONS --date" | |
;; | |
--delete) | |
OPT_DELETE=true | |
OPT_REV_DIR=true | |
OPTIONS="$OPTIONS --delete" | |
;; | |
--dry*) | |
OPT_DRYRUN=true | |
OPTIONS="$OPTIONS --dry-run" | |
;; | |
-e|--exclude) | |
shift; [[ $# -gt 0 ]] || usage | |
wild="${1//\'/}" | |
OPT_EXCLUDE+=("$wild") | |
OPTIONS="$OPTIONS --exclude '$wild'" | |
;; | |
-h|--help) | |
showhelp | |
;; | |
-m|--mode) | |
OPT_FULL_MODE=true | |
OPTIONS="$OPTIONS --mode" | |
;; | |
-n|--name*) | |
OPT_NAME_ONLY=true | |
OPTIONS="$OPTIONS --name-only" | |
;; | |
--prefix) | |
shift; [[ $# -gt 0 ]] || usage | |
OPT_PREFIX="$1" | |
;; | |
--recurse) | |
OPT_RECURSE=true | |
OPTIONS="$OPTIONS --recurse" | |
;; | |
-r|--reverse*) | |
OPT_REV_DIR=true | |
OPTIONS="$OPTIONS --reverse-directory" | |
;; | |
-s|--sync*) | |
shift; [[ $# -gt 0 ]] || usage | |
OPT_SYNC="$1" | |
;; | |
-v|--verbose) | |
OPT_VERBOSE=true | |
OPTIONS="$OPTIONS --verbose" | |
;; | |
-*) | |
usage | |
;; | |
*) | |
if [[ -z "$OPT_HOST" ]]; then | |
OPT_HOST="$1" | |
elif [[ -z "$OPT_ROOT" ]]; then | |
OPT_ROOT="$1" | |
else | |
usage | |
fi | |
;; | |
esac | |
shift | |
done | |
# Host is mandatory. | |
[[ -z "$OPT_HOST" ]] && usage | |
if $OPT_DELETE && [[ -n "$OPT_SYNC" ]]; then | |
error "cannot --delete and --synchronize at the same time" | |
fi | |
#----------------------------------------------------------------------------- | |
# Basic functions | |
#----------------------------------------------------------------------------- | |
# Check prerequisites. | |
case $(uname -s) in | |
Darwin) | |
GNUDATE=$(which gdate 2>/dev/null) | |
GNUSTAT=$(which gstat 2>/dev/null) | |
GNUSED=$(which gsed 2>/dev/null) | |
[[ -z "$GNUDATE" || -z "$GNUSTAT" ]] && error "install GNU coreutils, for instance 'brew install coreutils'" | |
[[ -z "$GNUSED" ]] && error "install GNU sed, for instance 'brew install gnu-sed'" | |
;; | |
Linux) | |
GNUDATE=date | |
GNUSTAT=stat | |
GNUSED=sed | |
;; | |
*) | |
GNUDATE=$(which gdate 2>/dev/null) | |
GNUSTAT=$(which gstat 2>/dev/null) | |
GNUSED=$(which gsed 2>/dev/null) | |
[[ -z "$GNUDATE" ]] && error "GNU 'date' utility not found" | |
[[ -z "$GNUSTAT" ]] && error "GNU 'stat' utility not found" | |
[[ -z "$GNUSED" ]] && error "GNU 'sed' utility not found" | |
;; | |
esac | |
# Convert a date into number of seconds since epoch. | |
date_to_seconds() { $GNUDATE "--date=$1" +%s 2>/dev/null; } | |
# Size in bytes of a file. | |
file_size() { $GNUSTAT --format %s "$1"; } | |
# Modification date of a file in seconds since epoch. | |
file_date() { $GNUSTAT --format %Y "$1"; } | |
#----------------------------------------------------------------------------- | |
# Special case of --delete | |
#----------------------------------------------------------------------------- | |
if $OPT_DELETE && ! $OPT_RECURSE && ! $OPT_DRYRUN; then | |
[[ -z "$OPT_ROOT" ]] && error "Must provide a directory with --delete to avoid accidents" | |
($BASH_SOURCE $OPTIONS "$OPT_HOST" "$OPT_ROOT" --recurse; echo "-rmdir '$OPT_ROOT'") | \ | |
sftp -q -b - "$OPT_HOST" | \ | |
($OPT_VERBOSE && $GNUSED -e 's/^sftp> *-*//' || grep -v '^sftp>') | |
exit | |
fi | |
#----------------------------------------------------------------------------- | |
# Special case of --synchronize | |
#----------------------------------------------------------------------------- | |
if [[ -n "$OPT_SYNC" ]]; then | |
[[ -d "$OPT_SYNC" ]] || error "$OPT_SYNC is not a valid local directory" | |
# List remote files. | |
TMPFILE=/tmp/sftp.$$ | |
$BASH_SOURCE $OPTIONS "$OPT_HOST" "$OPT_ROOT" --recurse >$TMPFILE | |
# Exclusion options for find command. | |
opts="" | |
for e in "${OPT_EXCLUDE[@]}"; do | |
opts="${opts}${opts:+ -o } -name $e" | |
done | |
# Loop on all local files to synchronize. | |
find "$OPT_SYNC" \( $opts \) -prune -o \( \( -type d -o -type f \) -print \) | \ | |
$GNUSED -e "s|^$OPT_SYNC/*||" -e '/^ *$/d' | \ | |
while read file; do | |
# Find matching line on remote site. | |
rem=$(grep -m 1 " $file\$" $TMPFILE) | |
# Full local path. | |
loc_file="${OPT_SYNC}/${file}" | |
if [[ -d "$loc_file" ]]; then | |
# This is a directory, create if does not exist remotely. | |
if [[ -z "$rem" ]]; then | |
echo "#### missing remote directory $file" | |
if ! $OPT_DRYRUN; then | |
echo "mkdir '${OPT_ROOT}${OPT_ROOT:+/}${file}'" | sftp -q -b - "$OPT_HOST" | |
fi | |
fi | |
else | |
# This is a file, copy if size is different or date is older. | |
copy=false | |
if [[ -z "$rem" ]]; then | |
echo "#### missing remote file $file" | |
copy=true | |
else | |
rem_size=$(cut <<<"$rem" -d ' ' -f 5) | |
rem_date=$(cut <<<"$rem" -d ' ' -f 6) | |
loc_size=$(file_size "$loc_file") | |
loc_date=$(file_date "$loc_file") | |
if [[ -z "$rem_size" || "$rem_size" -ne "$loc_size" || -z "$rem_date" || "$rem_date" -lt "$loc_date" ]]; then | |
echo "#### obsolete remote file $file" | |
copy=true | |
fi | |
fi | |
if $copy && ! $OPT_DRYRUN; then | |
rem_dir=$(dirname "${OPT_ROOT}${OPT_ROOT:+/}${file}") | |
loc_dir=$(dirname "$loc_file") | |
base=$(basename "$file") | |
echo -e "lcd '$loc_dir'\ncd '$rem_dir'\nput '$base'" | sftp -q -b - "$OPT_HOST" | |
fi | |
fi | |
done | |
rm -f $TMPFILE | |
exit | |
fi | |
#----------------------------------------------------------------------------- | |
# Perform SFTP listing. | |
#----------------------------------------------------------------------------- | |
# Just in case it influences SFTP output. | |
export LANG=en_US.UTF-8 | |
# Width of the date field in SFTP output. | |
dwidth=0 | |
# Loop on all sftp ls output lines. | |
# Remove lines for ".." but keep lines for "." (used later). | |
echo "ls -la '$OPT_ROOT'" | sftp -q -b - "$OPT_HOST" | grep '^[d-]' | grep -v ' \.\.$' | while read line; do | |
# The main problem with SFTP output is that the width and format of the date field | |
# is not predictable and may contain spaces. Split the line in two, prefix and suffix. | |
# Get mode, #links, owner, group, size in prefix and date and file name in suffix. | |
# Fields in the prefix are predictable and do not contain spaces. | |
prefix=$($GNUSED <<<"$line" -e 's/^\([^ ][^ ]* *[^ ][^ ]* *[^ ][^ ]* *[^ ][^ ]* *[^ ][^ ]*\) .*$/\1/') | |
suffix=${line:${#prefix}} | |
if [[ $dwidth -eq 0 && $suffix == *\ . ]]; then | |
# This is the line for '.', typically the first line in a directory listing. | |
# We don't list it but we use it to compute the width of the date field. | |
dwidth=$((${#suffix} - 1)) | |
else | |
# This is a standard file or directory. Get basic fields. | |
prefix=$($GNUSED <<<"$prefix" -e 's/ */ /g') | |
type=${prefix:0:1} | |
type=${type/-/f} | |
mode=${prefix:1:9} | |
owner=$(cut <<<"$prefix" -d ' ' -f 3) | |
group=$(cut <<<"$prefix" -d ' ' -f 4) | |
size=$(cut <<<"$prefix" -d ' ' -f 5) | |
# Mode | |
if ! $OPT_FULL_MODE; then | |
intmode=0 | |
while [[ -n "$mode" ]]; do | |
intmode=$(( $intmode << 1 )) | |
[[ "${mode:0:1}" != "-" ]] && intmode=$(( $intmode | 1 )) | |
mode=${mode:1} | |
done | |
mode=$(printf '%04o' $intmode) | |
fi | |
# Date | |
if [[ $dwidth == 0 ]]; then | |
# Don't know date field size, assume that the file name has no space. | |
date=0 | |
name=$($GNUSED <<<"$suffix" -e 's/^.* \([^ ][^ ]*\)$/\1/') | |
else | |
# Expect a fixed-size date field. | |
date=$($GNUSED <<<"${suffix:0:$dwidth}" -e 's/^ *//' -e 's/ */ /g' -e 's/ *$//') | |
if $OPT_FULL_DATE; then | |
date="'$date'" | |
else | |
date=$(date_to_seconds "$date") | |
date=${date:-0} | |
fi | |
# File name follows the date. | |
name=${suffix:$dwidth} | |
fi | |
# Skip file if it matches an exclude spec. | |
excl=false | |
for wild in "${OPT_EXCLUDE[@]}"; do | |
if [[ $name == $wild ]]; then | |
excl=true | |
break | |
fi | |
done | |
$excl && continue | |
# Output line format: | |
if $OPT_DELETE; then | |
[[ "$type" == d ]] && cmd="rmdir" || cmd="rm" | |
out="-$cmd '${OPT_ROOT}${OPT_ROOT:+/}${name}'" | |
elif $OPT_NAME_ONLY; then | |
out="${OPT_PREFIX}$name" | |
else | |
out="$type $mode $owner $group $size $date ${OPT_PREFIX}$name" | |
fi | |
# Output and recurse on directory. | |
if [[ "$type" != d ]]; then | |
# Regular file. | |
echo "$out" | |
else | |
$OPT_REV_DIR || echo "$out" | |
$BASH_SOURCE $OPTIONS "$OPT_HOST" "${OPT_ROOT}${OPT_ROOT:+/}${name}" --recurse --prefix "${OPT_PREFIX}${name}/" | |
$OPT_REV_DIR && echo "$out" | |
fi | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment