Skip to content

Instantly share code, notes, and snippets.

@crazy4pi314
Created June 8, 2020 20:28
Show Gist options
  • Save crazy4pi314/02eb96f1076488c86cd55808fb06613a to your computer and use it in GitHub Desktop.
Save crazy4pi314/02eb96f1076488c86cd55808fb06613a to your computer and use it in GitHub Desktop.
import sys
from os import listdir
from os.path import isfile, join, getctime, basename
import datetime
import glob
from collections import defaultdict
from hashlib import sha256
import asyncio as aio
import aiofiles as aiof
import pprint
import click
async def hash_file(path : str) -> str:
async with aiof.open(path,'rb') as f:
data = await f.read()
return sha256(data).hexdigest()
@click.command()
@click.option('--path', default=".", prompt="path to search", help='Root directory to search for duplecates')
def main(path):
aio.run(main_async(path))
async def main_async(path):
#my_path="C:\\Users\\skais\\OneDrive\\Pictures"
#onlyfiles = [(f, getctime(join(mypath, f)))for f in listdir(mypath) if (isfile(join(mypath, f)) and f[-3:]=="pdf")]
#print(onlyfiles)
files_by_hash = defaultdict(list)
files = glob.glob(path + '/**/*.[Pp][Nn][Gg]', recursive=True)
for file in files:
files_by_hash[await hash_file(file)].append(file)
#print(files_by_hash)
pics_with_dupes = dict(filter(lambda x: len(x[1]) > 1, files_by_hash.items()))
pprint.pprint(pics_with_dupes)
print(f"Found {len(files_by_hash)} files, {len(pics_with_dupes)} duplicate(s) found.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment