Created
May 15, 2017 03:52
-
-
Save dicksonkv/ef0d685ad67dd5ec96426060736b6530 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
''' | |
This script will copy all files with a specific extension to a directory. | |
Script will ignore all the files which has same content. | |
Usage : ./duplicateRemoverCopy.py -s /home/ -e .mp3 -d /music | |
''' | |
import argparse | |
import os | |
import sys | |
import shutil | |
import hashlib | |
''' | |
To check if the running user is root. | |
''' | |
if not os.geteuid() == 0: | |
sys.exit('Script must be run as root') | |
parser = argparse. ArgumentParser() | |
parser.add_argument("-s", dest="sourceDirectory", help="Directorry to scan for files.",metavar='/path/to/scan',required=True) | |
parser.add_argument("-e", dest="fileExtension", help="File extension that you want to scan.",metavar='.extension',required=True) | |
parser.add_argument("-d", dest="backupDirectory", help="Directory to copy the files.",metavar='/path/to/save',required=True) | |
args = parser.parse_args() | |
sourceDirectory = args.sourceDirectory | |
backupDirectory = args.backupDirectory | |
fileExtension = args.fileExtension | |
''' | |
This function will calculate the checksum of a given file and | |
store that hash value into a list called hashlist. The function will | |
return 'True' if it's found a file with duplicate content. | |
''' | |
hashlist = [] | |
def filefound(fname): | |
hashObj = hashlib.new('md5') | |
fObj = open(fname,'rb').read() | |
hashObj.update(fObj) | |
hashSum = hashObj.hexdigest() | |
if hashSum not in hashlist: | |
hashlist.append(hashSum) | |
return False # not copyed yet. | |
else: | |
return True # already copyied | |
''' | |
Creating the backup Directory if it is not present. | |
''' | |
if not os.path.exists(backupDirectory): | |
os.mkdir(backupDirectory) | |
for rootDir ,subDirs,subFiles in os.walk(sourceDirectory): | |
for subFile in subFiles: | |
absPath = os.path.join(rootDir,subFile) | |
if absPath.endswith(fileExtension): | |
if not filefound(absPath): | |
print('Copying :',absPath) | |
shutil.copy(absPath,backupDirectory) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment