Last active
June 19, 2021 21:08
-
-
Save nhalstead/60610a9e90263ccad38432a2fa02f77f to your computer and use it in GitHub Desktop.
Used to cleanup duplicate files. (by filename local to the folder)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
/** | |
* 1. Find all files that have the common symbols for copied files " (1)." | |
* 2. Sort list by the largest file size | |
* 3. Delete all other matches (and original filename without the number indication) | |
* 4. Rename the largest to match the original name | |
* | |
* This program uses a TXT to read in the index of files. | |
* Run the following to dump the file list. | |
* | |
* find "$PWD" -type f > index.txt | |
* | |
* Glob was not used since the normal find command can speed through it and allow for testing. | |
* | |
*/ | |
echo "Finding Files" . PHP_EOL; | |
$all = file('index.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); | |
// Detect Duplicate Files from the Index | |
$dups = array_filter($all, function ($fname) { | |
return preg_match("/ \(\d+\)\./m", $fname); | |
}); | |
$pairs = []; | |
// Loop through the files and pair them up by the "root" or "main" part of the filename | |
foreach($dups as $fname) { | |
// Remove the duplicate number indication using regex. | |
$pk = preg_replace("/ \(\d+\)\./m", ".", $fname); | |
$pkFn = basename($pk); | |
if(!isset($pairs[ $pkFn ])) { | |
$pairs[ $pkFn ] = array(); | |
// Check if a file exists without the duplicate number indication | |
if (file_exists($pk)) { | |
$pairs[ $pkFn ][] = $pk; | |
} | |
} | |
$pairs[ $pkFn ][] = $fname; | |
} | |
// Order Each of the Pairs by Size (Largest First) | |
foreach($pairs as $key => $set) { | |
usort($pairs[$key], function ($a, $b){ | |
// Check filesize | |
return filesize($b) - filesize($a); | |
}); | |
} | |
// Loop through each pair and rename the first entry [0] (largest) and delete the others | |
foreach($pairs as $bestName => $set) { | |
if(count($set) === 1) { | |
echo "Skip" . PHP_EOL; | |
continue; | |
} | |
$newFileOldName = $set[0]; | |
foreach($set as $index => $fname) { | |
// Skip the first entry. | |
if ($index === 0) continue; | |
echo "Remove " . $fname . PHP_EOL; | |
unlink($fname); | |
} | |
// Rename the new file to be the correct file | |
$newFileNewName = str_replace(basename($newFileOldName), $bestName, $newFileOldName); | |
if ($newFileNewName !== $newFileOldName) { | |
echo "Rename " . $newFileOldName . " -> " . $newFileNewName . PHP_EOL; | |
} | |
rename($newFileOldName, $newFileNewName); | |
echo PHP_EOL; | |
echo PHP_EOL; | |
} | |
echo "Done" . PHP_EOL; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment