Skip to content

Instantly share code, notes, and snippets.

@kayhadrin
Last active April 5, 2025 19:10
Show Gist options
  • Save kayhadrin/1a9bda690e9d6a44e72df66cc67f543f to your computer and use it in GitHub Desktop.
Save kayhadrin/1a9bda690e9d6a44e72df66cc67f543f to your computer and use it in GitHub Desktop.
Analyse a given Syncthing versions folder and display the disk space used by files grouped by dates.
#!/bin/bash
echo Analyse a given Syncthing versions folder and display the disk space used by files grouped by dates.
targetDir=$( realpath "$1" )
echo Target directory: "'"$targetDir"'"
echo Start time: $(date)
if ! [[ -d "$targetDir" ]]; then
echo "Unable to access target folder: '$targetDir'"
exit 1
fi
human_readable_size() {
local size="$1"
local units=("B" "KB" "MB" "GB" "TB" "PB" "EB" "ZB" "YB")
local i=0
if [[ "$size" -lt 1024 ]]; then
echo "$size ${units[0]}"
return
fi
while [[ "$size" -ge 1024 ]]; do
size=$((size / 1024))
((i++))
done
echo "$size ${units[$i]}"
}
# Favor using '/tmp' first, or what the environment exposes instead.
# This is because EntWare uses '/opt/tmp' but I'd rather use '/tmp' first...
if [[ -d "/tmp" ]]; then
TMPDIR=/tmp
else
TMPDIR=${TMPDIR:-${TEMP:-${TMP:-/tmp}}}
fi
tmpDir=$TMPDIR/syncthing_versions_du
mkdir "$tmpDir" 2> /dev/null
if ! [[ -d "$tmpDir" ]]; then
echo Unable to create cache folder: "$tmpDir"
exit 2
fi
# Pattern: file name with "~dddddd-dddddd" regardless of the extension
# We could also check that the name ends with an extension `\(\.[^.+]+\)?` but I'm not sure how to handle files with multiple \. (or how Syncthing will name them)
versionFiles=$( find "$targetDir" -type f -regex '.*~[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9].*' )
versionDates=$( echo "$versionFiles" | sed 's/.*~\([0-9]\{8\}\).*/\1/g' | sort | uniq )
# DEBUG: only show 3 first versions
# versionDates=$( echo "$versionDates" | head -3 )
versionDateCount=$(echo "$versionDates" | wc -l)
echo "Disk usage by version date ($versionDateCount items):"
results=()
cachedResultNb=0
while read versionDate; do
# versionDiskUsage=$( find . -type f -name '*~'"$versionDate"'-*' | xargs -d '\n' du -bc | tail -1 | sed 's/ *total//' )
versionFilesByDate=$( echo "$versionFiles" | grep '.*~'"$versionDate"'-.*' )
versionFilesByDateHash=$(echo "$versionFilesByDate" | md5sum | sed 's/\([^ ]\+\) .*/\1/g' )
versionDiskUsageCache="$tmpDir/diskUsageByDate-${versionDate}__${versionFilesByDateHash}.txt"
if [[ -f "$versionDiskUsageCache" ]]; then
#DEBUG
# echo "Cached result used: $versionDiskUsageCache"
usage=$( cat "$versionDiskUsageCache" )
echo "$usage"
results+=( "$usage" )
cachedResultNb=$(($cachedResultNb + 1))
else
versionDiskUsage=$(
echo "$versionFilesByDate" \
| xargs -d '\n' du -bc \
| tail -1 \
| sed 's/\([0-9]\+\).*/\1/g'
)
usage="$versionDate: $versionDiskUsage: $(human_readable_size $versionDiskUsage)"
echo "$usage" | tee "$versionDiskUsageCache"
results+=( "$usage" )
fi
done < <( echo "$versionDates" )
echo
echo "Found $cachedResultNb cached results."
# Echo the array with newline separators
IFS=$'\n' # Set the Internal Field Separator to a newline
echo
echo "Disk usage per version date, in descending order ($versionDateCount items):"
# Warning: ensure that the " , " separator has a space so that `sort -rn` can parse the byte size as a number, instead of doing an alphabetical sort
echo "${results[*]}" | awk -F': ' '{print $2 " , " $1 " , " $3}' \
| sort -rn \
| awk -F' , ' '{print $2 ": " $1 " ("$3")"}'
echo
echo Tips to view files for a given date:
echo " " find "'"$targetDir"'" "-type f -name '*~YYYYMMDD-*'"
echo " " find "'"$targetDir"'" "-type f -name '*~YYYYMMDD-*' | sort | xargs -d '\n' du -bch"
echo
echo End time: $(date)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment