Last active
April 5, 2025 19:10
-
-
Save kayhadrin/1a9bda690e9d6a44e72df66cc67f543f to your computer and use it in GitHub Desktop.
Analyse a given Syncthing versions folder and display the disk space used by files grouped by dates.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo Analyse a given Syncthing versions folder and display the disk space used by files grouped by dates. | |
targetDir=$( realpath "$1" ) | |
echo Target directory: "'"$targetDir"'" | |
echo Start time: $(date) | |
if ! [[ -d "$targetDir" ]]; then | |
echo "Unable to access target folder: '$targetDir'" | |
exit 1 | |
fi | |
human_readable_size() { | |
local size="$1" | |
local units=("B" "KB" "MB" "GB" "TB" "PB" "EB" "ZB" "YB") | |
local i=0 | |
if [[ "$size" -lt 1024 ]]; then | |
echo "$size ${units[0]}" | |
return | |
fi | |
while [[ "$size" -ge 1024 ]]; do | |
size=$((size / 1024)) | |
((i++)) | |
done | |
echo "$size ${units[$i]}" | |
} | |
# Favor using '/tmp' first, or what the environment exposes instead. | |
# This is because EntWare uses '/opt/tmp' but I'd rather use '/tmp' first... | |
if [[ -d "/tmp" ]]; then | |
TMPDIR=/tmp | |
else | |
TMPDIR=${TMPDIR:-${TEMP:-${TMP:-/tmp}}} | |
fi | |
tmpDir=$TMPDIR/syncthing_versions_du | |
mkdir "$tmpDir" 2> /dev/null | |
if ! [[ -d "$tmpDir" ]]; then | |
echo Unable to create cache folder: "$tmpDir" | |
exit 2 | |
fi | |
# Pattern: file name with "~dddddd-dddddd" regardless of the extension | |
# We could also check that the name ends with an extension `\(\.[^.+]+\)?` but I'm not sure how to handle files with multiple \. (or how Syncthing will name them) | |
versionFiles=$( find "$targetDir" -type f -regex '.*~[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9].*' ) | |
versionDates=$( echo "$versionFiles" | sed 's/.*~\([0-9]\{8\}\).*/\1/g' | sort | uniq ) | |
# DEBUG: only show 3 first versions | |
# versionDates=$( echo "$versionDates" | head -3 ) | |
versionDateCount=$(echo "$versionDates" | wc -l) | |
echo "Disk usage by version date ($versionDateCount items):" | |
results=() | |
cachedResultNb=0 | |
while read versionDate; do | |
# versionDiskUsage=$( find . -type f -name '*~'"$versionDate"'-*' | xargs -d '\n' du -bc | tail -1 | sed 's/ *total//' ) | |
versionFilesByDate=$( echo "$versionFiles" | grep '.*~'"$versionDate"'-.*' ) | |
versionFilesByDateHash=$(echo "$versionFilesByDate" | md5sum | sed 's/\([^ ]\+\) .*/\1/g' ) | |
versionDiskUsageCache="$tmpDir/diskUsageByDate-${versionDate}__${versionFilesByDateHash}.txt" | |
if [[ -f "$versionDiskUsageCache" ]]; then | |
#DEBUG | |
# echo "Cached result used: $versionDiskUsageCache" | |
usage=$( cat "$versionDiskUsageCache" ) | |
echo "$usage" | |
results+=( "$usage" ) | |
cachedResultNb=$(($cachedResultNb + 1)) | |
else | |
versionDiskUsage=$( | |
echo "$versionFilesByDate" \ | |
| xargs -d '\n' du -bc \ | |
| tail -1 \ | |
| sed 's/\([0-9]\+\).*/\1/g' | |
) | |
usage="$versionDate: $versionDiskUsage: $(human_readable_size $versionDiskUsage)" | |
echo "$usage" | tee "$versionDiskUsageCache" | |
results+=( "$usage" ) | |
fi | |
done < <( echo "$versionDates" ) | |
echo | |
echo "Found $cachedResultNb cached results." | |
# Echo the array with newline separators | |
IFS=$'\n' # Set the Internal Field Separator to a newline | |
echo | |
echo "Disk usage per version date, in descending order ($versionDateCount items):" | |
# Warning: ensure that the " , " separator has a space so that `sort -rn` can parse the byte size as a number, instead of doing an alphabetical sort | |
echo "${results[*]}" | awk -F': ' '{print $2 " , " $1 " , " $3}' \ | |
| sort -rn \ | |
| awk -F' , ' '{print $2 ": " $1 " ("$3")"}' | |
echo | |
echo Tips to view files for a given date: | |
echo " " find "'"$targetDir"'" "-type f -name '*~YYYYMMDD-*'" | |
echo " " find "'"$targetDir"'" "-type f -name '*~YYYYMMDD-*' | sort | xargs -d '\n' du -bch" | |
echo | |
echo End time: $(date) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment