Last active
June 10, 2023 11:03
-
-
Save benjaminoakes/6d78fbb79a54f51c15c9da1d08d4a7ab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Purpose: Find all the duplicate files recursively and delete them | |
# Recommended: review what will be deleted before running with `xargs rm` | |
find . -type f -exec md5sum {} \; > MD5SUMS | |
# Assumption: no whitespace in filenames | |
cat MD5SUMS | sort | awk '{ if (prev_md5==$1) { print $2 }; prev_md5=$1 }' | xargs rm | |
# Remove partially downloaded files that have filenames repeated in two different directories | |
cd subdir-1 | |
ls -l | awk '{ print $9 " subdir-1/" $9 " " $5 }' > SIZES | |
cd subdir-2 | |
ls -l | awk '{ print $9 " subdir-2/" $9 " " $5 }' > SIZES | |
cd .. | |
cat subdir-1/SIZES subdir-2/SIZES | sort > SIZES | |
cat SIZES | awk '{ if (prev_fn==$1) { if (prev_size>$3) { print prev_full_fn } else { print $2 } }; prev_fn=$1; prev_full_fn=$2; prev_size=$3 }' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment