Skip to content

Instantly share code, notes, and snippets.

@luisnomad
Created February 16, 2025 11:31
Show Gist options
  • Save luisnomad/05cde719ba5ca2603f5c7b8c8d38144b to your computer and use it in GitHub Desktop.
Save luisnomad/05cde719ba5ca2603f5c7b8c8d38144b to your computer and use it in GitHub Desktop.
Bash script to find duplicated images with different file names
#!/opt/homebrew/bin/bash
image_dir="$1"
delete_mode=false
# Check if directory is provided
if [[ -z "$image_dir" ]]; then
echo "Usage: $0 <image_directory> [--delete]"
echo "If duplicates found, run with --delete to remove them."
exit 1
fi
# Check if --delete parameter is provided
if [[ "$2" == "--delete" ]]; then
delete_mode=true
fi
# Check if directory exists
if [[ ! -d "$image_dir" ]]; then
echo "Error: Directory '$image_dir' does not exist."
exit 1
fi
temp_file=$(mktemp) || exit 1
# Convert relative paths to absolute paths
image_dir=$(cd "$image_dir" && pwd)
# Find all image files, compute their hashes, and store in a temp file
find "$image_dir" -type f \( -name "*.jpg" -o -name "*.jpeg" -o -name "*.png" -o -name "*.gif" \) -print0 | while IFS= read -r -d $'\0' image_file; do
image_hash=$(shasum -a 256 "$image_file" | awk '{print $1}')
date_added=$(mdls -raw -n kMDItemDateAdded "$image_file")
echo "$image_hash;$image_file;$date_added" >> "$temp_file"
done
# Read the temp file, sort by hash and date, and process duplicates
declare -A groups
while IFS=';' read -r hash file date; do
if [[ -z "${groups[$hash]}" ]]; then
groups["$hash"]="$file;$date" # First file in group (to keep)
else
groups["$hash"]+=$'\n'"$file;$date" # Additional files in group (to delete)
fi
done < <(sort -t';' -k1,1 -k3 "$temp_file")
# Process each group of duplicates
for hash in "${!groups[@]}"; do
IFS=$'\n' read -r -d '' -a files <<< "${groups[$hash]}"
if [[ ${#files[@]} -gt 1 ]]; then # Only process groups with duplicates
echo "Duplicate group (hash: $hash):"
echo " [KEEP] file://${files[0]%;*} (added: ${files[0]#*;})" # First file in group
for ((i=1; i<${#files[@]}; i++)); do
echo " [DELETE] file://${files[$i]%;*} (added: ${files[$i]#*;})" # Additional files
done
if [[ "$delete_mode" == true ]]; then
echo
echo "The following files will be deleted:"
for ((i=1; i<${#files[@]}; i++)); do
echo " file://${files[$i]%;*}"
done
read -p "Confirm delete? (y/n): " confirm
if [[ "$confirm" == "y" || "$confirm" == "Y" ]]; then
for ((i=1; i<${#files[@]}; i++)); do
rm "${files[$i]%;*}"
echo "Deleted: ${files[$i]%;*}"
done
else
echo "Skipping deletion for this group."
fi
fi
fi
done
rm "$temp_file"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment