Last active
July 13, 2018 18:49
-
-
Save aravindkumarsvg/5e1e074c56a8244f4fa4f1991b39a940 to your computer and use it in GitHub Desktop.
Replaces the duplicate files with the hard link to the file which comes first in the lexical sorting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
################################################################################# | |
# # | |
# - Removes the duplicate normal files from the given directory and makes them # | |
# as the hard link for the non removed file. # | |
# - The non removed file is the one which comes first in the lexical ordering # | |
# - Script uses fdupes utility to find out the duplicate files # | |
# # | |
# = Usage: bash script.sh duplicate_files_folder # | |
# # | |
################################################################################# | |
# Validates the inputted directory | |
validate_input_directory () { | |
# Checks for the existence of the directory | |
if [ ! -z $1 ] && [ -d $1 ] | |
then | |
input_dir=$1 | |
else | |
echo "Input is not valid" | |
exit 1 | |
fi | |
} | |
# Checks for the availability of fdupes command | |
checks_fdupes () { | |
# Gets the path of the fdupes binary | |
which fdupes > /dev/null | |
# If the exit status is not zero, then fdupes is not installed | |
if [ $? -ne 0 ] | |
then | |
echo "fdupes is not available" | |
exit 1 | |
fi | |
} | |
# Executes fdupes on the given directory to get all the duplicates. | |
# In each set of duplicates, keeps the one which is at first. | |
# and makes the others as hard links to the first one. | |
duplicates_remover () { | |
OLDIFS=$IFS # Holds the old IFS | |
IFS=$'\n' # sets the IFS to new line | |
# Loops through the duplicate files | |
for duplicate_files_batch in $(fdupes --sameline ${input_dir}) | |
do | |
OLDIFS1=$IFS # Holds the old IFS | |
IFS=$' ' # sets the IFS to new line | |
local temp_filename="" # Holds the filename which is about to be assigned for this batch | |
# Iterates through the batch | |
for duplicate_file in $duplicate_files_batch | |
do | |
# Checks only for the normal file | |
if [ -f $duplicate_file ] | |
then | |
# Gets the filename which is least on lexical comparison | |
if [ -z $temp_filename ] || [[ $temp_filename > $duplicate_file ]] | |
then | |
temp_filename=$duplicate_file | |
fi | |
fi | |
done | |
# Checks whether the filename has been calculated | |
if [ ! -z $temp_filename ] && [ -f $temp_filename ] | |
then | |
# Iterates through the batch. | |
# Removes all the duplicates except the filename which comes first in lexical ordering. | |
# Creates the hard links with the removed file names to the non removed file. | |
for duplicate_file_remove in $duplicate_files_batch | |
do | |
# Checks only for the normal file | |
if [ $temp_filename != $duplicate_file_remove ] && [ -f $duplicate_file_remove ] | |
then | |
# Removes the duplicate file | |
rm -f $duplicate_file_remove & | |
# Gets the pid of the file removal process | |
duplicate_file_remove_pid=$! | |
while : | |
do | |
# checks for the availability of rm -rf command in process list | |
if ps ax | grep -v grep | grep $duplicate_file_remove_pid | grep "rm -f ${duplicate_file_remove}" > /dev/null | |
then | |
# Pauses the process for one second | |
sleep 1 | |
# : means do nothing | |
: | |
else | |
# if the rm -rf is successfully executed, then the while loop is exited | |
break | |
fi | |
done | |
# Checks whether the file removal is successful | |
if [ ! -f $duplicate_file_remove ] | |
then | |
# Creates a hard link to the file with the removed filename | |
ln $temp_filename $duplicate_file_remove | |
fi | |
fi | |
done | |
fi | |
IFS=$OLDIFS1 # Restores the old IFS | |
done | |
IFS=$OLDIFS # Restores the old IFS | |
} | |
# Function which acts as the starting point of execution | |
main () { | |
# Checks for the availability of the fdupes binary | |
checks_fdupes | |
# Validates the inputted directory | |
validate_input_directory $@ | |
# Modifies the duplicates as hard links to the non removed file | |
duplicates_remover | |
} | |
# program starting point | |
main $@ | |
# Exits the program with exit status - 0 | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment