Skip to content

Instantly share code, notes, and snippets.

@qbolec
Created June 15, 2025 18:07
Show Gist options
  • Save qbolec/2f74ad9c11fb934764b889c85b66dea5 to your computer and use it in GitHub Desktop.
Save qbolec/2f74ad9c11fb934764b889c85b66dea5 to your computer and use it in GitHub Desktop.
Syncing Google Photos Takeout to NAS
#!/bin/bash
# This tool helps you backup photos from Google Photos to your NAS
# It assumes that you have ssh access to the NAS, on which there's a directory
# in which you keep all the files from Google Photos, and wish to incrementally
# add new files found in the zip file downloaded from Google Takout.
# The tool makes effort to unzip and upload only these files which have md5sum
# different from any file already in the destination backup directory.
# It creates a subdirectory at destination with a name matching modification date of the zip,
# so it is easier to restart this on failure, and figure out what got added.
# The script was tested on Windows with Git Bash, and the NAS running Linux with bash.
# The script assumes the port of ssh is $PORT.
# It uploads files in batches of $BATCH_SIZE, to keep the space usage minimal,
# but it might be annoying if you need to retype passphrase,
# so consider using ssh-agent, keys with passphrase or increasing the batchsize
#
# Usage: ./upload_new_photos.sh takeout.zip user@nas-ip:/path/to/archive
PORT=2222
BATCH_SIZE=20
set -euo pipefail
if [ $# -ne 2 ]; then
echo "Usage: $0 <takeout.zip> <user@nas-ip:/archive/path>"
echo "Example:"
echo "eval \`ssh-agent -s\`"
echo "ssh-add ~/Documents/private-with-password.open-ssl"
echo "$0 ~/Downloads/Takeout.zip qbolec@Samantha:/volume1/photo/Google"
exit 1
fi
TAKEOUT_ZIP="$1"
REMOTE_TARGET="$2"
REMOTE_USER_HOST=$(echo "$REMOTE_TARGET" | cut -d: -f1)
REMOTE_PATH=$(echo "$REMOTE_TARGET" | cut -d: -f2)
# Check if inputs exist
if [ ! -f "$TAKEOUT_ZIP" ]; then
echo "Error: ZIP file not found: $TAKEOUT_ZIP"
exit 1
fi
# Get ZIP modification date for consistent naming
# Git Bash on Windows - use stat command
ZIP_DATE=$(stat -c %Y "$TAKEOUT_ZIP" 2>/dev/null || stat -f %m "$TAKEOUT_ZIP" 2>/dev/null)
ZIP_DATE_STR=$(date -d "@$ZIP_DATE" +%Y-%m-%d_%H%M%S 2>/dev/null || date -r "$ZIP_DATE" +%Y-%m-%d_%H%M%S)
WORK_DIR="$TMPDIR/takeout_work_$$"
NEW_PHOTOS_DIR="$WORK_DIR/new_photos"
REMOTE_SUBDIR="takeout_$ZIP_DATE_STR"
echo "Processing: $TAKEOUT_ZIP"
echo "Remote target: $REMOTE_TARGET"
echo "Will create remote directory: $REMOTE_SUBDIR"
echo "Temporary directory: $WORK_DIR"
# Create working directory
mkdir -p "$WORK_DIR"
# Cleanup function
cleanup() {
echo "Cleaning up temporary files..."
rm -rf "$WORK_DIR"
}
trap cleanup EXIT
echo ""
echo "Step 1: Getting existing file hashes from NAS..."
ssh -p $PORT "$REMOTE_USER_HOST" "find '$REMOTE_PATH' -type f -exec md5sum {} \\; 2>/dev/null | cut -d' ' -f1" > "$WORK_DIR/existing_hashes.txt" || {
echo "Could not get existing hashes (maybe empty archive?)"
exit 2
}
if [ ! -s "$WORK_DIR/existing_hashes.txt" ]
then
if ! ssh -p $PORT "$REMOTE_USER_HOST" "test -d '$REMOTE_PATH'"
then
echo "$REMOTE_PATH is not a directory on $REMOTE_USER_HOST"
exit 3
fi
fi
existing_count=$(wc -l < "$WORK_DIR/existing_hashes.txt")
echo "Found $existing_count existing files on NAS"
echo "Sorting the hashes from NAS"
sort $WORK_DIR/existing_hashes.txt > $WORK_DIR/existing_hashes.sorted
echo ""
echo "Step 2: Hashing files in ZIP..."
unzip -Z1 "$TAKEOUT_ZIP" | grep -v '/$' > $WORK_DIR/zip_files.txt
total_files=$(wc -l < $WORK_DIR/zip_files.txt)
echo "Processing $total_files files..."
current_file=0
cat $WORK_DIR/zip_files.txt | while read filename; do
current_file=$((current_file + 1))
printf "\rProgress: %d/%d files hashed %-50s" $current_file $total_files "" 1>&2
hash=$(unzip -p "$TAKEOUT_ZIP" "$filename" | md5sum | cut -d' ' -f1)
echo "$hash $filename"
done | sort > "$WORK_DIR/zip_hashes.sorted"
echo "" # New line after progress
zip_count=$(wc -l < "$WORK_DIR/zip_hashes.sorted")
echo "Hashed $zip_count files from ZIP"
echo ""
echo "Step 3: Finding new files..."
join -v2 -t' ' "$WORK_DIR/existing_hashes.sorted" "$WORK_DIR/zip_hashes.sorted" > "$WORK_DIR/new_files.txt"
new_count=$(wc -l < "$WORK_DIR/new_files.txt")
echo "Found $new_count new files to extract and upload"
if [ $new_count -eq 0 ]; then
echo "No new files found. Nothing to upload."
exit 0
fi
echo ""
echo "Step 4: Uploading new files to NAS..."
echo "Creating remote directory: $REMOTE_PATH/$REMOTE_SUBDIR"
# Create remote directory
ssh -p $PORT "$REMOTE_USER_HOST" "mkdir -p '$REMOTE_PATH/$REMOTE_SUBDIR'"
echo ""
echo "Step 5: Extracting new files locally and uploading"
extracted_count=0
while read hash filename; do
extracted_count=$((extracted_count + 1))
printf "\rExtracting: %d/%d - %-60s" $extracted_count $new_count "$(basename "$filename")"
# Create directory structure
target_dir="$NEW_PHOTOS_DIR/$(dirname "$filename")"
mkdir -p "$target_dir"
# Extract file
unzip -p "$TAKEOUT_ZIP" "$filename" > "$NEW_PHOTOS_DIR/$filename"
if ((extracted_count % BATCH_SIZE == 0)) || ((extracted_count == new_count));
then
echo ""
# Upload files preserving structure
echo "Uploading files..."
scp -O -P $PORT -r "$NEW_PHOTOS_DIR/"* "$REMOTE_USER_HOST":"$REMOTE_PATH/$REMOTE_SUBDIR/" || exit 3
rm -rf "$NEW_PHOTOS_DIR"
fi
done < "$WORK_DIR/new_files.txt"
echo "" # New line after progress
echo ""
echo "Upload complete!"
echo "New files location: $REMOTE_PATH/$REMOTE_SUBDIR"
echo "Successfully processed $new_count new files"
# Show some stats
echo ""
echo "Summary:"
echo "- Existing files on NAS: $existing_count"
echo "- Files in ZIP: $zip_count"
echo "- New files uploaded: $new_count"
echo "- Remote directory: $REMOTE_SUBDIR"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment