crazy4pi314 · June 8, 2020 20:28
diff --git a/find_hashes.py b/find_hashes.py
 import sys
 from os import listdir
 from os.path import isfile, join, getctime, basename
 import datetime
 import glob
 from collections import defaultdict
 from hashlib import sha256

 import asyncio as aio
 import aiofiles as aiof
 import pprint
 import click

 async def hash_file(path : str) -> str:
    async with aiof.open(path,'rb') as f:
        data = await f.read()
        return sha256(data).hexdigest()

 @click.command()
 @click.option('--path', default=".", prompt="path to search", help='Root directory to search for duplecates')
 def main(path):
    aio.run(main_async(path))

 async def main_async(path):
    #my_path="C:\\Users\\skais\\OneDrive\\Pictures"
    #onlyfiles = [(f, getctime(join(mypath, f)))for f in listdir(mypath) if (isfile(join(mypath, f)) and f[-3:]=="pdf")]
    #print(onlyfiles)
    files_by_hash = defaultdict(list) 
    files = glob.glob(path + '/**/*.[Pp][Nn][Gg]', recursive=True)
    
    for file in files:
        files_by_hash[await hash_file(file)].append(file)
    #print(files_by_hash)
    pics_with_dupes = dict(filter(lambda x: len(x[1]) > 1, files_by_hash.items()))
    pprint.pprint(pics_with_dupes)
    print(f"Found {len(files_by_hash)} files, {len(pics_with_dupes)} duplicate(s) found.")

 if __name__ == "__main__":
    main()
	import sys
	from os import listdir
	from os.path import isfile, join, getctime, basename
	import datetime
	import glob
	from collections import defaultdict
	from hashlib import sha256

	import asyncio as aio
	import aiofiles as aiof
	import pprint
	import click

	async def hash_file(path : str) -> str:
	async with aiof.open(path,'rb') as f:
	data = await f.read()
	return sha256(data).hexdigest()

	@click.command()
	@click.option('--path', default=".", prompt="path to search", help='Root directory to search for duplecates')
	def main(path):
	aio.run(main_async(path))

	async def main_async(path):
	#my_path="C:\\Users\\skais\\OneDrive\\Pictures"
	#onlyfiles = [(f, getctime(join(mypath, f)))for f in listdir(mypath) if (isfile(join(mypath, f)) and f[-3:]=="pdf")]
	#print(onlyfiles)
	files_by_hash = defaultdict(list)
	files = glob.glob(path + '/*/.[Pp][Nn][Gg]', recursive=True)

	for file in files:
	files_by_hash[await hash_file(file)].append(file)
	#print(files_by_hash)
	pics_with_dupes = dict(filter(lambda x: len(x[1]) > 1, files_by_hash.items()))
	pprint.pprint(pics_with_dupes)
	print(f"Found {len(files_by_hash)} files, {len(pics_with_dupes)} duplicate(s) found.")

	if __name__ == "__main__":
	main()