Last active
February 13, 2025 05:59
-
-
Save yeiichi/95123a6b517d2121087b15e57206fb89 to your computer and use it in GitHub Desktop.
Create a filename / sha256 checksum list and save as a CSV file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import csv | |
from hashlib import sha256 | |
from pathlib import Path | |
def hash_by_256(fpath): | |
""" | |
Computes a SHA-256 hash of a file using variable chunk sizes based on file size. | |
This function calculates the SHA-256 hash of the contents of a file located at | |
the provided file path. The file is processed in its entirety or in chunks, | |
depending on the file size. For smaller files, it processes them in one read, | |
while for larger files, it reads and processes them in chunks of varying size. | |
Args: | |
fpath (str): The path to the file whose hash needs to be calculated. It | |
should point to an accessible and valid file on the filesystem. | |
Returns: | |
str: The computed SHA-256 hash as a hexadecimal string. | |
""" | |
# Get file size in bytes | |
file_size = Path(fpath).stat().st_size | |
# Decide processing strategy based on file size | |
match file_size: | |
case size if size <= 1024 * 512: # Files <= 512 KB -> Process in one go | |
with open(fpath, 'rb') as f: | |
return sha256(f.read()).hexdigest() # Read entire small file | |
case size if size <= 1024 * 1024 * 10: # Files <= 10 MB -> Use 4 KB chunks | |
chunk_size = 4096 | |
case _: # Files > 10 MB -> Use 8 KB chunks | |
chunk_size = 8192 | |
# Process file in chunks | |
file_hash = sha256() | |
with open(fpath, 'rb') as f: | |
while chunk := f.read(chunk_size): | |
file_hash.update(chunk) | |
return file_hash.hexdigest() | |
def list_fpaths_in_dir(src_dir): | |
"""Lists all file paths in the specified directory. | |
This function scans a given directory and retrieves a list of file paths, | |
excluding hidden files and directories (those starting with a dot). Only | |
files are included in the returned list. | |
Args: | |
src_dir (str): The directory path to scan for file paths. | |
Returns: | |
List[Path]: A list of file paths present in the specified directory. | |
""" | |
return [i for i in Path(src_dir).glob('[!.]*') if i.is_file()] | |
def make_fname_vs_sha_tbl(list_of_fpath): | |
header = [['filename', 'sha256']] | |
return header + [[i.name, hash_by_256(i)] for i in list_of_fpath] | |
def prevent_overwrite(fpath): | |
""" | |
Prevents overwriting an existing file by appending a numerical suffix to the file name. | |
This function checks whether the given file path refers to an existing file. If it does, | |
it modifies the file name by appending an incrementing numerical suffix, maintaining the | |
original file's stem and extension. The resulting file path ensures uniqueness, preventing | |
overwriting of the existing file. | |
Args: | |
fpath (Path): The original file path to check for potential overwriting. | |
Returns: | |
Path: A modified file path with a numerical suffix if the original file exists, or | |
the original path if no file exists with that name. | |
""" | |
current_path = fpath | |
i = 1 | |
while current_path.is_file(): | |
current_path = current_path.with_name( | |
f'{current_path.name}_{i:03}{current_path.suffix}') | |
i += 1 | |
return current_path | |
def save_as_csv(csv_path, list_of_lists): | |
with open(csv_path, 'w', newline='') as csvfile: | |
csv_writer = csv.writer(csvfile) | |
csv_writer.writerows(list_of_lists) | |
print(f'\033[93mSaved {csv_path}\033[0m') | |
if __name__ == '__main__': | |
my_src_dir = Path(input('DIR? >> ').strip()) | |
fpath_list = sorted(list_fpaths_in_dir(str(my_src_dir))) | |
arr = make_fname_vs_sha_tbl(fpath_list) | |
out_csv = prevent_overwrite( | |
Path(my_src_dir / f'sha256_{my_src_dir.name}')).with_suffix('.csv') | |
save_as_csv(out_csv, arr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment