Skip to content

Instantly share code, notes, and snippets.

@lsl
Created March 5, 2025 11:46
Show Gist options
  • Save lsl/f1f4424f0b2f485446f536f9a20debc0 to your computer and use it in GitHub Desktop.
Save lsl/f1f4424f0b2f485446f536f9a20debc0 to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# backup.google - Uses rclone to downloads and extract Google Takeout archives from Google Drive
#
# I use this script to backup my Google Takeout (mainly photos & drive) to my local machine where
# things are encrypted and backed up regularly with borg.
#
# The script will:
# 1. List available Takeout archives from Google Drive.
# 2. Download your selected archive.
# 3. Extract the archive to ~/Google/Takeout.
# 4. Offer cleanup options for downloaded and remote files.
#
# Requirements:
# - rclone configured with a "google-drive" remote.
# - rdfind installed to deduplicate files with hardlinks.
# - Takeout archives stored in "google-drive:Takeout".
# - Takeout archives configured as .tgz (not .zip).
# - Takeout archives split into chunks greater than your largest file. (untested, assumption, I use 10GB splits to be safe)
#
set -euo pipefail
# Config
RCLONE_REMOTE="google-drive:Takeout"
LOCAL_ROOT="${HOME}/Google"
TAKEOUT_DIR="${LOCAL_ROOT}/Takeout"
# Function to list archives available on Google Drive
list_archives() {
rclone ls "$RCLONE_REMOTE" | grep "takeout-" | awk -F'takeout-|-[0-9]+.tgz' '{print $2}' | sort -u || true
}
# Function to download the selected archive
download_archive() {
local archive_date="$1"
local archive_dir="${LOCAL_ROOT}/takeout-${archive_date}"
echo "Downloading archive: $archive_date..."
mkdir -p "$archive_dir"
rclone copy -P --include "takeout-${archive_date}-*.tgz" "$RCLONE_REMOTE" "$archive_dir"
echo -e "\nDownloaded files:"
ls -alh "$archive_dir"
echo "Download complete!"
echo "Archive saved to: $archive_dir"
}
# Function to extract archives
extract_archive() {
local archive_dir="$1"
echo -e "\nChecking existing Takeout directory..."
if [[ -d "$TAKEOUT_DIR" ]]; then
read -p "Delete existing Takeout directory before extraction? (y/N): " delete_takeout
if [[ "$delete_takeout" =~ ^[Yy]$ ]]; then
rm -rf "$TAKEOUT_DIR"
echo "Old Takeout directory removed."
fi
fi
echo "Extracting files..."
for archive in "$archive_dir"/*.tgz; do
echo "Extracting $archive..."
tar -xzf "$archive" -C "$LOCAL_ROOT" \
--exclude="Takeout/Keep/*.json" \
--exclude="Takeout/Google Photos/**/*.supplemental-metadata.json" \
--exclude="Takeout/Google Photos/**/*.json" \
--exclude="Takeout/Google Photos/**/metadata.json"
done
echo "Extraction complete!"
}
# Function to clean up local archive files
cleanup_local_archives() {
local archive_dir="$1"
read -p "Delete downloaded archive files (*tgz)? (Y/n): " delete_local
if [[ -z "$delete_local" || "$delete_local" =~ ^[Yy]$ ]]; then
rm -r "$archive_dir"
echo "Local archive directory and all files deleted."
fi
}
# Function to clean up remote archive files
cleanup_remote_archives() {
local archive_date="$1"
read -p "Delete files on Google Drive? (Y/n): " delete_remote
if [[ -z "$delete_remote" || "$delete_remote" =~ ^[Yy]$ ]]; then
for file in $(rclone ls "$RCLONE_REMOTE" | awk '{print $2}' | grep "takeout-${archive_date}-"); do
rclone delete -v "${RCLONE_REMOTE}/${file}"
done
echo "Remote files deleted."
fi
}
# Function to clean up nested Takeout directory
cleanup_nested_takeout() {
# Check for nested Takeout directory
if [[ -d "$TAKEOUT_DIR/Drive/Takeout" ]]; then
echo -e "\nChecking for nested Takeout directory..."
echo "Contents of $TAKEOUT_DIR/Drive/Takeout:"
ls -la "$TAKEOUT_DIR/Drive/Takeout"
read -p "Do you want to delete this nested Takeout directory? (Y/n): " delete_nested
if [[ -z "$delete_nested" || "$delete_nested" =~ ^[Yy]$ ]]; then
rm -r "$TAKEOUT_DIR/Drive/Takeout"
echo "Nested Takeout directory deleted."
fi
fi
}
# Function to deduplicate files
deduplicate_files() {
read -p "Deduplicate files in Takeout directory with hardlinks? (Y/n): " deduplicate
if [[ -z "$deduplicate" || "$deduplicate" =~ ^[Yy]$ ]]; then
echo -e "\nDeduplicating files in Takeout directory..."
cd "$LOCAL_ROOT"
rdfind -makehardlinks true Takeout
echo "Deduplication complete!"
fi
}
# Main script
echo "Fetching Takeout archives from Google Drive..."
archives=$(list_archives)
if [[ -z "$archives" ]]; then
echo "No archives found on Google Drive at $RCLONE_REMOTE"
exit 1
fi
echo "Available archives:"
echo "$archives"
latest_archive=$(echo "$archives" | tail -n1)
echo -e "
Enter the archive timestamp (e.g., ${latest_archive}) to download:
[default: ${latest_archive}]"
read -r archive_date
if [[ -z "$archive_date" ]]; then
archive_date="$latest_archive"
echo "Using latest archive: $archive_date"
fi
archive_dir="${LOCAL_ROOT}/takeout-${archive_date}"
download_archive "$archive_date"
extract_archive "$archive_dir"
# Cleanup process
cleanup_local_archives "$archive_dir"
cleanup_remote_archives "$archive_date"
cleanup_nested_takeout
deduplicate_files
echo "Cleanup complete!"
echo -e "\n💡 Remember to run: backup.borg"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment