Created
March 5, 2025 11:46
-
-
Save lsl/f1f4424f0b2f485446f536f9a20debc0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# backup.google - Uses rclone to downloads and extract Google Takeout archives from Google Drive | |
# | |
# I use this script to backup my Google Takeout (mainly photos & drive) to my local machine where | |
# things are encrypted and backed up regularly with borg. | |
# | |
# The script will: | |
# 1. List available Takeout archives from Google Drive. | |
# 2. Download your selected archive. | |
# 3. Extract the archive to ~/Google/Takeout. | |
# 4. Offer cleanup options for downloaded and remote files. | |
# | |
# Requirements: | |
# - rclone configured with a "google-drive" remote. | |
# - rdfind installed to deduplicate files with hardlinks. | |
# - Takeout archives stored in "google-drive:Takeout". | |
# - Takeout archives configured as .tgz (not .zip). | |
# - Takeout archives split into chunks greater than your largest file. (untested, assumption, I use 10GB splits to be safe) | |
# | |
set -euo pipefail | |
# Config | |
RCLONE_REMOTE="google-drive:Takeout" | |
LOCAL_ROOT="${HOME}/Google" | |
TAKEOUT_DIR="${LOCAL_ROOT}/Takeout" | |
# Function to list archives available on Google Drive | |
list_archives() { | |
rclone ls "$RCLONE_REMOTE" | grep "takeout-" | awk -F'takeout-|-[0-9]+.tgz' '{print $2}' | sort -u || true | |
} | |
# Function to download the selected archive | |
download_archive() { | |
local archive_date="$1" | |
local archive_dir="${LOCAL_ROOT}/takeout-${archive_date}" | |
echo "Downloading archive: $archive_date..." | |
mkdir -p "$archive_dir" | |
rclone copy -P --include "takeout-${archive_date}-*.tgz" "$RCLONE_REMOTE" "$archive_dir" | |
echo -e "\nDownloaded files:" | |
ls -alh "$archive_dir" | |
echo "Download complete!" | |
echo "Archive saved to: $archive_dir" | |
} | |
# Function to extract archives | |
extract_archive() { | |
local archive_dir="$1" | |
echo -e "\nChecking existing Takeout directory..." | |
if [[ -d "$TAKEOUT_DIR" ]]; then | |
read -p "Delete existing Takeout directory before extraction? (y/N): " delete_takeout | |
if [[ "$delete_takeout" =~ ^[Yy]$ ]]; then | |
rm -rf "$TAKEOUT_DIR" | |
echo "Old Takeout directory removed." | |
fi | |
fi | |
echo "Extracting files..." | |
for archive in "$archive_dir"/*.tgz; do | |
echo "Extracting $archive..." | |
tar -xzf "$archive" -C "$LOCAL_ROOT" \ | |
--exclude="Takeout/Keep/*.json" \ | |
--exclude="Takeout/Google Photos/**/*.supplemental-metadata.json" \ | |
--exclude="Takeout/Google Photos/**/*.json" \ | |
--exclude="Takeout/Google Photos/**/metadata.json" | |
done | |
echo "Extraction complete!" | |
} | |
# Function to clean up local archive files | |
cleanup_local_archives() { | |
local archive_dir="$1" | |
read -p "Delete downloaded archive files (*tgz)? (Y/n): " delete_local | |
if [[ -z "$delete_local" || "$delete_local" =~ ^[Yy]$ ]]; then | |
rm -r "$archive_dir" | |
echo "Local archive directory and all files deleted." | |
fi | |
} | |
# Function to clean up remote archive files | |
cleanup_remote_archives() { | |
local archive_date="$1" | |
read -p "Delete files on Google Drive? (Y/n): " delete_remote | |
if [[ -z "$delete_remote" || "$delete_remote" =~ ^[Yy]$ ]]; then | |
for file in $(rclone ls "$RCLONE_REMOTE" | awk '{print $2}' | grep "takeout-${archive_date}-"); do | |
rclone delete -v "${RCLONE_REMOTE}/${file}" | |
done | |
echo "Remote files deleted." | |
fi | |
} | |
# Function to clean up nested Takeout directory | |
cleanup_nested_takeout() { | |
# Check for nested Takeout directory | |
if [[ -d "$TAKEOUT_DIR/Drive/Takeout" ]]; then | |
echo -e "\nChecking for nested Takeout directory..." | |
echo "Contents of $TAKEOUT_DIR/Drive/Takeout:" | |
ls -la "$TAKEOUT_DIR/Drive/Takeout" | |
read -p "Do you want to delete this nested Takeout directory? (Y/n): " delete_nested | |
if [[ -z "$delete_nested" || "$delete_nested" =~ ^[Yy]$ ]]; then | |
rm -r "$TAKEOUT_DIR/Drive/Takeout" | |
echo "Nested Takeout directory deleted." | |
fi | |
fi | |
} | |
# Function to deduplicate files | |
deduplicate_files() { | |
read -p "Deduplicate files in Takeout directory with hardlinks? (Y/n): " deduplicate | |
if [[ -z "$deduplicate" || "$deduplicate" =~ ^[Yy]$ ]]; then | |
echo -e "\nDeduplicating files in Takeout directory..." | |
cd "$LOCAL_ROOT" | |
rdfind -makehardlinks true Takeout | |
echo "Deduplication complete!" | |
fi | |
} | |
# Main script | |
echo "Fetching Takeout archives from Google Drive..." | |
archives=$(list_archives) | |
if [[ -z "$archives" ]]; then | |
echo "No archives found on Google Drive at $RCLONE_REMOTE" | |
exit 1 | |
fi | |
echo "Available archives:" | |
echo "$archives" | |
latest_archive=$(echo "$archives" | tail -n1) | |
echo -e " | |
Enter the archive timestamp (e.g., ${latest_archive}) to download: | |
[default: ${latest_archive}]" | |
read -r archive_date | |
if [[ -z "$archive_date" ]]; then | |
archive_date="$latest_archive" | |
echo "Using latest archive: $archive_date" | |
fi | |
archive_dir="${LOCAL_ROOT}/takeout-${archive_date}" | |
download_archive "$archive_date" | |
extract_archive "$archive_dir" | |
# Cleanup process | |
cleanup_local_archives "$archive_dir" | |
cleanup_remote_archives "$archive_date" | |
cleanup_nested_takeout | |
deduplicate_files | |
echo "Cleanup complete!" | |
echo -e "\n💡 Remember to run: backup.borg" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment