Last active
May 28, 2018 23:54
-
-
Save narbehaj/bda6c044273468abee01b317aa80ab28 to your computer and use it in GitHub Desktop.
Finds duplicate files under the directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import os | |
m_list = [] | |
for i, d , e in os.walk('/home/test/'): | |
for file in e: | |
if file.endswith('mkv'): | |
with open('{}/{}'.format(i, file), 'rb') as file_read: | |
for chunk in iter(lambda: file_read.read(4096), b""): | |
file_hash = hashlib.md5(chunk).hexdigest() | |
if file_hash in m_list: | |
print(file) | |
# os.remove('{}/{}'.format(i, file)) | |
else: | |
m_list.append(file_hash) | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment