Created
February 16, 2025 11:31
-
-
Save luisnomad/05cde719ba5ca2603f5c7b8c8d38144b to your computer and use it in GitHub Desktop.
Bash script to find duplicated images with different file names
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/opt/homebrew/bin/bash | |
image_dir="$1" | |
delete_mode=false | |
# Check if directory is provided | |
if [[ -z "$image_dir" ]]; then | |
echo "Usage: $0 <image_directory> [--delete]" | |
echo "If duplicates found, run with --delete to remove them." | |
exit 1 | |
fi | |
# Check if --delete parameter is provided | |
if [[ "$2" == "--delete" ]]; then | |
delete_mode=true | |
fi | |
# Check if directory exists | |
if [[ ! -d "$image_dir" ]]; then | |
echo "Error: Directory '$image_dir' does not exist." | |
exit 1 | |
fi | |
temp_file=$(mktemp) || exit 1 | |
# Convert relative paths to absolute paths | |
image_dir=$(cd "$image_dir" && pwd) | |
# Find all image files, compute their hashes, and store in a temp file | |
find "$image_dir" -type f \( -name "*.jpg" -o -name "*.jpeg" -o -name "*.png" -o -name "*.gif" \) -print0 | while IFS= read -r -d $'\0' image_file; do | |
image_hash=$(shasum -a 256 "$image_file" | awk '{print $1}') | |
date_added=$(mdls -raw -n kMDItemDateAdded "$image_file") | |
echo "$image_hash;$image_file;$date_added" >> "$temp_file" | |
done | |
# Read the temp file, sort by hash and date, and process duplicates | |
declare -A groups | |
while IFS=';' read -r hash file date; do | |
if [[ -z "${groups[$hash]}" ]]; then | |
groups["$hash"]="$file;$date" # First file in group (to keep) | |
else | |
groups["$hash"]+=$'\n'"$file;$date" # Additional files in group (to delete) | |
fi | |
done < <(sort -t';' -k1,1 -k3 "$temp_file") | |
# Process each group of duplicates | |
for hash in "${!groups[@]}"; do | |
IFS=$'\n' read -r -d '' -a files <<< "${groups[$hash]}" | |
if [[ ${#files[@]} -gt 1 ]]; then # Only process groups with duplicates | |
echo "Duplicate group (hash: $hash):" | |
echo " [KEEP] file://${files[0]%;*} (added: ${files[0]#*;})" # First file in group | |
for ((i=1; i<${#files[@]}; i++)); do | |
echo " [DELETE] file://${files[$i]%;*} (added: ${files[$i]#*;})" # Additional files | |
done | |
if [[ "$delete_mode" == true ]]; then | |
echo | |
echo "The following files will be deleted:" | |
for ((i=1; i<${#files[@]}; i++)); do | |
echo " file://${files[$i]%;*}" | |
done | |
read -p "Confirm delete? (y/n): " confirm | |
if [[ "$confirm" == "y" || "$confirm" == "Y" ]]; then | |
for ((i=1; i<${#files[@]}; i++)); do | |
rm "${files[$i]%;*}" | |
echo "Deleted: ${files[$i]%;*}" | |
done | |
else | |
echo "Skipping deletion for this group." | |
fi | |
fi | |
fi | |
done | |
rm "$temp_file" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment