|
#!/usr/bin/env bash |
|
# |
|
# Scan singularity image files in a directory for a path |
|
# |
|
# Note: |
|
# - path-regex is prepended with '^squashfs-root' |
|
# - run from a directory with fast IO (e.g. tmpfs), use -s to specify a slower filesystem with more space to use if we run out of space in the local dir |
|
# - optionally providing image arguments is mostly for testing your regex |
|
set -euo pipefail |
|
|
|
function usage() { |
|
echo "usage: $0 [-d newer-than-date] [-i image_dir] [-l log_dir] [-s <slow-disk>] <path-regex> [image ...]" |
|
exit 2 |
|
} |
|
|
|
function bail() { |
|
printf '\n' |
|
rm -f "./squashfs.$$" |
|
rm -f "${slow_disk}/squashfs.$$" |
|
} |
|
|
|
newermt= |
|
image_dir='/cvmfs/singularity.galaxyproject.org/all' |
|
log_dir=$(pwd) |
|
slow_disk= |
|
|
|
while getopts ":d:i:l:s:" opt; do |
|
case "$opt" in |
|
d) |
|
newermt="$OPTARG" |
|
;; |
|
i) |
|
image_dir="$OPTARG" |
|
;; |
|
l) |
|
log_dir="$OPTARG" |
|
;; |
|
s) |
|
slow_disk="$OPTARG" |
|
;; |
|
*) |
|
usage |
|
;; |
|
esac |
|
done |
|
|
|
shift $((OPTIND-1)) |
|
|
|
[ -n "${1:-}" ] || usage |
|
|
|
image_path_regex="$1"; shift |
|
|
|
cwd=$(pwd) |
|
cd "$image_dir" |
|
if [ -n "${1:-}" ]; then |
|
images=("$@") |
|
elif [ -n "$newermt" ]; then |
|
images=($(find . -maxdepth 1 -type f -newermt "$newermt" -printf '%f\n')) |
|
else |
|
images=(*) |
|
#images=(augur:13.0.1--pyhdfd78af_0*) |
|
#images=(augur:*) |
|
fi |
|
cd "$cwd" |
|
|
|
trap bail EXIT ERR |
|
|
|
image_count=${#images[@]} |
|
|
|
{ |
|
printf "Checking %d images\n" "$image_count" >&2 |
|
|
|
start=$SECONDS |
|
|
|
for ((i = 0 ; i < $image_count ; i++)); do |
|
image="${images[$i]}" |
|
image_path="${image_dir}/${image}" |
|
[ -f "${image_path}" ] || continue |
|
|
|
sif_list=$(singularity sif list "${image_path}" 2>> "${log_dir}/sif-list-errors.txt") || { |
|
printf "Command was: singularity sif list %s\n" "$image_path" >> "${log_dir}/sif-list-errors.txt" |
|
printf "\nImage appears to be corrupt: %s\n" "$image" >&2 |
|
printf "%s\n" "$image" >> "${log_dir}/bad-images.txt" |
|
continue |
|
} |
|
|
|
sif_od=$(grep Squashfs <<< "$sif_list" | awk '{print $1}') |
|
if [ $(wc -l <<< "$sif_od") -ne 1 ]; then |
|
printf "\nCan't determine object ID for Squashfs layer: %s\n" "$image" >&2 |
|
printf "%s\n" "$image" >> "${log_dir}/skipped-images.txt" |
|
continue |
|
fi |
|
|
|
squashfs="./squashfs.$$" |
|
singularity sif dump "$sif_od" "$image_path" > "$squashfs" 2>> "${log_dir}/sif-dump-errors.txt" || { |
|
printf "Command was: singularity sif dump %s %s\n" "$sif_od" "$image_path" >> "${log_dir}/sif-dump-errors.txt" |
|
printf "\nSingularity sif dump failed: %s\n" "$image" >&2 |
|
if [ -d "$slow_disk" ]; then |
|
squashfs="${slow_disk}/squashfs.$$" |
|
singularity sif dump "$sif_od" "$image_path" > "${squashfs}" || { |
|
printf "\nSingularity sif dump failed again: %s\n" "$image" >&2 |
|
printf "%s\n" "$image" >> "${log_dir}/skipped-images.txt" |
|
continue |
|
} |
|
else |
|
printf "\nNo slow dir set, cannot try again: %s\n" "$image" >&2 |
|
printf "%s\n" "$image" >> "${log_dir}/skipped-images.txt" |
|
fi |
|
} |
|
|
|
# grep -q does not work here for reasons i cannot explain |
|
unsquashfs -l "${squashfs}" | grep "^squashfs-root${image_path_regex}" >/dev/null && echo "$image" || true |
|
|
|
percent=$(bc -l <<< "(${i}/${image_count})*100") |
|
elapsed=$((SECONDS - $start)) |
|
remaining=$((image_count - $i)) |
|
time_remaining=$(bc -l <<< "(${elapsed}/$((i + 1)))*${remaining}") |
|
printf "\r%0${#image_count}d %07.4f%% %0.fs remaining " $i $percent $time_remaining >&2 |
|
|
|
done |