Created
June 15, 2025 18:07
-
-
Save qbolec/2f74ad9c11fb934764b889c85b66dea5 to your computer and use it in GitHub Desktop.
Syncing Google Photos Takeout to NAS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This tool helps you backup photos from Google Photos to your NAS | |
# It assumes that you have ssh access to the NAS, on which there's a directory | |
# in which you keep all the files from Google Photos, and wish to incrementally | |
# add new files found in the zip file downloaded from Google Takout. | |
# The tool makes effort to unzip and upload only these files which have md5sum | |
# different from any file already in the destination backup directory. | |
# It creates a subdirectory at destination with a name matching modification date of the zip, | |
# so it is easier to restart this on failure, and figure out what got added. | |
# The script was tested on Windows with Git Bash, and the NAS running Linux with bash. | |
# The script assumes the port of ssh is $PORT. | |
# It uploads files in batches of $BATCH_SIZE, to keep the space usage minimal, | |
# but it might be annoying if you need to retype passphrase, | |
# so consider using ssh-agent, keys with passphrase or increasing the batchsize | |
# | |
# Usage: ./upload_new_photos.sh takeout.zip user@nas-ip:/path/to/archive | |
PORT=2222 | |
BATCH_SIZE=20 | |
set -euo pipefail | |
if [ $# -ne 2 ]; then | |
echo "Usage: $0 <takeout.zip> <user@nas-ip:/archive/path>" | |
echo "Example:" | |
echo "eval \`ssh-agent -s\`" | |
echo "ssh-add ~/Documents/private-with-password.open-ssl" | |
echo "$0 ~/Downloads/Takeout.zip qbolec@Samantha:/volume1/photo/Google" | |
exit 1 | |
fi | |
TAKEOUT_ZIP="$1" | |
REMOTE_TARGET="$2" | |
REMOTE_USER_HOST=$(echo "$REMOTE_TARGET" | cut -d: -f1) | |
REMOTE_PATH=$(echo "$REMOTE_TARGET" | cut -d: -f2) | |
# Check if inputs exist | |
if [ ! -f "$TAKEOUT_ZIP" ]; then | |
echo "Error: ZIP file not found: $TAKEOUT_ZIP" | |
exit 1 | |
fi | |
# Get ZIP modification date for consistent naming | |
# Git Bash on Windows - use stat command | |
ZIP_DATE=$(stat -c %Y "$TAKEOUT_ZIP" 2>/dev/null || stat -f %m "$TAKEOUT_ZIP" 2>/dev/null) | |
ZIP_DATE_STR=$(date -d "@$ZIP_DATE" +%Y-%m-%d_%H%M%S 2>/dev/null || date -r "$ZIP_DATE" +%Y-%m-%d_%H%M%S) | |
WORK_DIR="$TMPDIR/takeout_work_$$" | |
NEW_PHOTOS_DIR="$WORK_DIR/new_photos" | |
REMOTE_SUBDIR="takeout_$ZIP_DATE_STR" | |
echo "Processing: $TAKEOUT_ZIP" | |
echo "Remote target: $REMOTE_TARGET" | |
echo "Will create remote directory: $REMOTE_SUBDIR" | |
echo "Temporary directory: $WORK_DIR" | |
# Create working directory | |
mkdir -p "$WORK_DIR" | |
# Cleanup function | |
cleanup() { | |
echo "Cleaning up temporary files..." | |
rm -rf "$WORK_DIR" | |
} | |
trap cleanup EXIT | |
echo "" | |
echo "Step 1: Getting existing file hashes from NAS..." | |
ssh -p $PORT "$REMOTE_USER_HOST" "find '$REMOTE_PATH' -type f -exec md5sum {} \\; 2>/dev/null | cut -d' ' -f1" > "$WORK_DIR/existing_hashes.txt" || { | |
echo "Could not get existing hashes (maybe empty archive?)" | |
exit 2 | |
} | |
if [ ! -s "$WORK_DIR/existing_hashes.txt" ] | |
then | |
if ! ssh -p $PORT "$REMOTE_USER_HOST" "test -d '$REMOTE_PATH'" | |
then | |
echo "$REMOTE_PATH is not a directory on $REMOTE_USER_HOST" | |
exit 3 | |
fi | |
fi | |
existing_count=$(wc -l < "$WORK_DIR/existing_hashes.txt") | |
echo "Found $existing_count existing files on NAS" | |
echo "Sorting the hashes from NAS" | |
sort $WORK_DIR/existing_hashes.txt > $WORK_DIR/existing_hashes.sorted | |
echo "" | |
echo "Step 2: Hashing files in ZIP..." | |
unzip -Z1 "$TAKEOUT_ZIP" | grep -v '/$' > $WORK_DIR/zip_files.txt | |
total_files=$(wc -l < $WORK_DIR/zip_files.txt) | |
echo "Processing $total_files files..." | |
current_file=0 | |
cat $WORK_DIR/zip_files.txt | while read filename; do | |
current_file=$((current_file + 1)) | |
printf "\rProgress: %d/%d files hashed %-50s" $current_file $total_files "" 1>&2 | |
hash=$(unzip -p "$TAKEOUT_ZIP" "$filename" | md5sum | cut -d' ' -f1) | |
echo "$hash $filename" | |
done | sort > "$WORK_DIR/zip_hashes.sorted" | |
echo "" # New line after progress | |
zip_count=$(wc -l < "$WORK_DIR/zip_hashes.sorted") | |
echo "Hashed $zip_count files from ZIP" | |
echo "" | |
echo "Step 3: Finding new files..." | |
join -v2 -t' ' "$WORK_DIR/existing_hashes.sorted" "$WORK_DIR/zip_hashes.sorted" > "$WORK_DIR/new_files.txt" | |
new_count=$(wc -l < "$WORK_DIR/new_files.txt") | |
echo "Found $new_count new files to extract and upload" | |
if [ $new_count -eq 0 ]; then | |
echo "No new files found. Nothing to upload." | |
exit 0 | |
fi | |
echo "" | |
echo "Step 4: Uploading new files to NAS..." | |
echo "Creating remote directory: $REMOTE_PATH/$REMOTE_SUBDIR" | |
# Create remote directory | |
ssh -p $PORT "$REMOTE_USER_HOST" "mkdir -p '$REMOTE_PATH/$REMOTE_SUBDIR'" | |
echo "" | |
echo "Step 5: Extracting new files locally and uploading" | |
extracted_count=0 | |
while read hash filename; do | |
extracted_count=$((extracted_count + 1)) | |
printf "\rExtracting: %d/%d - %-60s" $extracted_count $new_count "$(basename "$filename")" | |
# Create directory structure | |
target_dir="$NEW_PHOTOS_DIR/$(dirname "$filename")" | |
mkdir -p "$target_dir" | |
# Extract file | |
unzip -p "$TAKEOUT_ZIP" "$filename" > "$NEW_PHOTOS_DIR/$filename" | |
if ((extracted_count % BATCH_SIZE == 0)) || ((extracted_count == new_count)); | |
then | |
echo "" | |
# Upload files preserving structure | |
echo "Uploading files..." | |
scp -O -P $PORT -r "$NEW_PHOTOS_DIR/"* "$REMOTE_USER_HOST":"$REMOTE_PATH/$REMOTE_SUBDIR/" || exit 3 | |
rm -rf "$NEW_PHOTOS_DIR" | |
fi | |
done < "$WORK_DIR/new_files.txt" | |
echo "" # New line after progress | |
echo "" | |
echo "Upload complete!" | |
echo "New files location: $REMOTE_PATH/$REMOTE_SUBDIR" | |
echo "Successfully processed $new_count new files" | |
# Show some stats | |
echo "" | |
echo "Summary:" | |
echo "- Existing files on NAS: $existing_count" | |
echo "- Files in ZIP: $zip_count" | |
echo "- New files uploaded: $new_count" | |
echo "- Remote directory: $REMOTE_SUBDIR" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment