Skip to content

Instantly share code, notes, and snippets.

@Benjythebee
Last active April 23, 2025 21:03
Show Gist options
  • Save Benjythebee/42172cf6db85772d26a9c0422eb582df to your computer and use it in GitHub Desktop.
Save Benjythebee/42172cf6db85772d26a9c0422eb582df to your computer and use it in GitHub Desktop.
Merge Google drive download files. (not zip)
#!/bin/bash
# Author: Benjythebee
# Script to merge files from google drive split downloads ino a new "merged" directory. This works because google will sometimes split large downloads into multiple folders with the same name or naming convention.
# Note: This script is designed to only work on extracted zips.
#
# Say for example you have
# download-001/folder 1/file 01.png
# download-002/folder 2/file 01.png
# download-001/folder 1/file 02.png
# this script will generate a new folder 'merged' and combine everything
# ./merged/folder 1/file 01.png
# ./merged/folder 1/file 02.png
# ./merged/folder 2/file 01.png
#
# HOW TO:
# 1. Extract all the zips into the same folder for example './my_root'
# 2. Run `./script.sh [--dry-run] [--backup] <absolute path to ./myroot>`
# 3. Check folder './my_root/merged' for merged content
#
# Usage: ./script.sh [--dry-run] [--backup] <root_directory>
# Parse command line arguments
dry_run=false
backup=false
root_dir=""
# Process arguments
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
dry_run=true
shift
;;
--backup)
backup=true
shift
;;
*)
# Assume the first non-flag argument is the root directory
if [[ -z "$root_dir" ]]; then
root_dir="$1"
else
echo "Error: Unexpected argument: $1"
echo "Usage: $0 [--dry-run] [--backup] <root_directory>"
exit 1
fi
shift
;;
esac
done
# Check if root directory is provided
if [[ -z "$root_dir" ]]; then
echo "Error: Root directory not specified."
echo "Usage: $0 [--dry-run] [--backup] <root_directory>"
exit 1
fi
# Check if root directory exists
if [[ ! -d "$root_dir" ]]; then
echo "Error: Root directory '$root_dir' does not exist or is not a directory."
exit 1
fi
# Display operation mode
if [[ "$dry_run" == true ]]; then
echo "Dry run mode enabled (no files will be moved)"
fi
if [[ "$backup" == true ]]; then
echo "Backup mode enabled (files will be copied instead of moved)"
fi
echo "Using root directory: $root_dir"
# Define the merged directory path
merged_dir="$root_dir/merged"
echo "Target merged directory: $merged_dir"
# Create the merged directory if not in dry run mode
if [[ "$dry_run" == false ]]; then
mkdir -p "$merged_dir"
echo "Created merged directory: $merged_dir"
fi
# Get all direct child directories as potential download directories
cd "$root_dir" || exit 1
potential_download_dirs=$(find . -mindepth 1 -maxdepth 1 -type d | grep -v "^./merged$" | sort)
# Check if we found any download directories
if [[ -z "$potential_download_dirs" ]]; then
echo "Error: No subdirectories found in $root_dir."
exit 1
fi
echo "Found potential download directories:"
echo "$potential_download_dirs" | sed 's/^/ /'
echo ""
# Check if all download directories have only one child directory with the same name
single_child_pattern=true
common_child_name=""
for dir in $potential_download_dirs; do
# Count child directories
child_dir_count=$(find "$dir" -mindepth 1 -maxdepth 1 -type d | wc -l)
if [[ "$child_dir_count" -ne 1 ]]; then
single_child_pattern=false
break
fi
# Get child directory name
child_dir_name=$(find "$dir" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | head -1)
if [[ -z "$common_child_name" ]]; then
common_child_name="$child_dir_name"
elif [[ "$common_child_name" != "$child_dir_name" ]]; then
# Child names don't match, so we won't use this pattern
single_child_pattern=false
break
fi
done
# Determine which directories to use as our "download directories"
download_dirs=""
if [[ "$single_child_pattern" == true ]] && [[ -n "$common_child_name" ]]; then
echo "Detected common single subfolder pattern with name: $common_child_name"
echo "Using these subfolders as the base for merging"
# Create the common child directory in the merged directory
if [[ "$dry_run" == false ]]; then
mkdir -p "$merged_dir/$common_child_name"
fi
for dir in $potential_download_dirs; do
download_dirs="$download_dirs $dir/$common_child_name"
done
else
# Use the original potential download directories
download_dirs="$potential_download_dirs"
fi
echo "Using the following directories for merging:"
echo "$download_dirs" | sed 's/^/ /'
echo ""
# Create a temporary directory to store file mappings
temp_dir=$(mktemp -d)
trap 'rm -rf "$temp_dir"' EXIT
# Find all unique subfolder names across all download directories
for dir in $download_dirs; do
find "$dir" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; >> "$temp_dir/all_subfolders.txt"
done
# Get unique subfolder names
unique_subfolders=$(sort -u "$temp_dir/all_subfolders.txt")
# Check if we found any subfolders
if [[ -z "$unique_subfolders" ]]; then
echo "No subfolders found across download directories."
# Check if there are any files directly in the download directories
total_files=0
for dir in $download_dirs; do
file_count=$(find "$dir" -maxdepth 1 -type f | wc -l)
total_files=$((total_files + file_count))
done
if [[ "$total_files" -gt 0 ]]; then
echo "Found $total_files files directly in download directories."
# Process files directly in download directories
if [[ "$dry_run" == true ]]; then
echo "[DRY RUN] Would move $total_files files to $merged_dir"
else
moved_files=0
for dir in $download_dirs; do
find "$dir" -maxdepth 1 -type f | while read -r file; do
filename=$(basename "$file")
target_file="$merged_dir/$filename"
# Check if target file already exists
if [[ -e "$target_file" ]]; then
echo "Warning: File already exists at destination: $target_file"
# Append a suffix to make the filename unique
base="${filename%.*}"
ext="${filename##*.}"
counter=1
while [[ -e "$merged_dir/${base}_${counter}.${ext}" ]]; do
counter=$((counter+1))
done
target_file="$merged_dir/${base}_${counter}.${ext}"
echo "Renaming to: $(basename "$target_file")"
fi
if [[ "$backup" = true ]]; then
cp -v "$file" "$target_file"
else
mv -v "$file" "$target_file"
fi
moved_files=$((moved_files+1))
done
done
echo "Moved $moved_files files to $merged_dir"
fi
fi
if [[ "$dry_run" == false ]] && [[ "$total_files" -eq 0 ]]; then
echo "No files to process. Removing empty merged directory."
rmdir "$merged_dir"
fi
exit 0
fi
echo "Found unique subfolders:"
echo "$unique_subfolders" | sed 's/^/ /'
echo ""
# Count for statistics
processed_folders=0
# Process each unique subfolder
for subfolder_name in $unique_subfolders; do
echo "Processing subfolder: $subfolder_name"
# Create the target directory in the merged location
target_dir="$merged_dir"
if [[ "$single_child_pattern" == true ]] && [[ -n "$common_child_name" ]]; then
target_dir="$merged_dir/$common_child_name"
fi
target_dir="$target_dir/$subfolder_name"
echo " Target location: $target_dir"
# Create the target directory if not in dry run mode
if [[ "$dry_run" == false ]]; then
mkdir -p "$target_dir"
fi
processed_folders=$((processed_folders+1))
# Find all instances of this subfolder across download dirs and move their files
for dir in $download_dirs; do
source_dir="$dir/$subfolder_name"
# Check if the source directory exists
if [[ -d "$source_dir" ]]; then
file_count=$(find "$source_dir" -type f | wc -l)
if [[ "$file_count" -eq 0 ]]; then
echo " No files found in $source_dir"
continue
fi
echo " Processing $file_count files from $source_dir"
# Move/copy all files from source to target
if [[ "$dry_run" = true ]]; then
echo " [DRY RUN] Would move $file_count files from $source_dir to $target_dir"
else
# Process files in the current directory
find "$source_dir" -maxdepth 1 -type f | while read -r file; do
filename=$(basename "$file")
target_file="$target_dir/$filename"
# Check if target file already exists
if [[ -e "$target_file" ]]; then
echo " Warning: File already exists at destination: $target_file"
# Append a suffix to make the filename unique
base="${filename%.*}"
ext="${filename##*.}"
counter=1
while [[ -e "$target_dir/${base}_${counter}.${ext}" ]]; do
counter=$((counter+1))
done
target_file="$target_dir/${base}_${counter}.${ext}"
echo " Renaming to: $(basename "$target_file")"
fi
if [[ "$backup" = true ]]; then
cp -v "$file" "$target_file"
else
mv -v "$file" "$target_file"
fi
done
# Process subdirectories recursively
find "$source_dir" -mindepth 1 -maxdepth 1 -type d | while read -r subdir; do
subdir_name=$(basename "$subdir")
target_subdir="$target_dir/$subdir_name"
# Process files in subdirectories recursively
find "$subdir" -type f | while read -r file; do
rel_path=${file#$subdir/}
target_file="$target_subdir/$rel_path"
# Create parent directories if needed
mkdir -p "$(dirname "$target_file")"
# Check if target file already exists
if [[ -e "$target_file" ]]; then
echo " Warning: File already exists at destination: $target_file"
# Append a suffix to make the filename unique
dir_part=$(dirname "$rel_path")
filename=$(basename "$rel_path")
base="${filename%.*}"
ext="${filename##*.}"
counter=1
if [[ "$dir_part" == "." ]]; then
while [[ -e "$target_subdir/${base}_${counter}.${ext}" ]]; do
counter=$((counter+1))
done
target_file="$target_subdir/${base}_${counter}.${ext}"
else
while [[ -e "$target_subdir/$dir_part/${base}_${counter}.${ext}" ]]; do
counter=$((counter+1))
done
target_file="$target_subdir/$dir_part/${base}_${counter}.${ext}"
fi
echo " Renaming to: ${target_file#$target_dir/}"
fi
if [[ "$backup" = true ]]; then
cp -v "$file" "$target_file"
else
mv -v "$file" "$target_file"
fi
done
done
fi
fi
done
done
# Clean up source directories if not in dry run or backup mode
if [[ "$dry_run" = false ]] && [[ "$backup" = false ]]; then
echo "Cleaning up empty directories in source locations..."
for dir in $download_dirs; do
find "$dir" -type d -empty -delete 2>/dev/null
done
fi
echo ""
echo "Summary:"
echo " Processed $processed_folders unique folders"
echo "Operation completed successfully."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment