Created
December 5, 2021 14:06
-
-
Save birkin/f0c66be4dcf219820b255b3acca9f9d1 to your computer and use it in GitHub Desktop.
playnig with os.makedirs() & pairtree
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Skimming <https://realpython.com/working-with-files-in-python/> | |
| for some forgotten reason, I came across the section "Creating Multiple Directories", | |
| which showed how the command: | |
| ... os.makedirs( '2018/10/05' ) | |
| ...will create a hierarchy of directories like: | |
| . | |
| | | |
| └── 2018/ | |
| └── 10/ | |
| └── 05/ | |
| So cool! Which then got me thinking of how that could be useful for creating pairtree paths. | |
| Here's some weekend play for that. | |
| (Also used pathlib for the first time, hence the explicit reminder var names and asserts.) | |
| """ | |
| import hashlib, logging, os, pathlib, pprint, shutil, sys | |
| from pathlib import Path | |
| assert sys.version_info >= (3, 0) # python3 | |
| ## set up logging ------------------------------- | |
| logging.basicConfig( | |
| level='DEBUG', | |
| format='[%(asctime)s] %(levelname)s [%(module)s-%(funcName)s()::%(lineno)d] %(message)s', | |
| datefmt='%d/%b/%Y %H:%M:%S' ) | |
| log = logging.getLogger(__name__) | |
| log.info( '\n\nstarting log\n============' ) | |
| ## set up directory references ------------------ | |
| source_files_dir_obj = Path( '../source_files' ).resolve() | |
| assert type(source_files_dir_obj) in [ pathlib.PosixPath, pathlib.WindowsPath ] | |
| destination_files_dir_obj = Path( '../output_directory' ).resolve() | |
| assert type(destination_files_dir_obj) in [ pathlib.PosixPath, pathlib.WindowsPath ] | |
| ## get source files ----------------------------- | |
| source_path_objs = list( source_files_dir_obj.iterdir() ) | |
| assert type(source_path_objs[0]) in [ pathlib.PosixPath, pathlib.WindowsPath ] | |
| log.debug( f'source_path_objs, ``{source_path_objs}`` ' ) | |
| source_file_objs = [] | |
| for path_obj in source_path_objs: | |
| if path_obj.is_file(): | |
| name = path_obj.name | |
| assert type(name) == str | |
| if name[0:1] != '.': # macOS `.DS_Store` | |
| source_file_objs.append( path_obj.resolve() ) | |
| log.debug( f'source_file_objs, ``{source_file_objs}`` ' ) | |
| ## iterate through source files ---------------- | |
| for path_obj in source_file_objs: | |
| path_str = str( path_obj ) | |
| with open( path_str, 'rb' ) as f: # 'rb' necessary if some of the files are images | |
| ## get hash (of contents) --------------- | |
| contents = f.read() | |
| assert type(contents) == bytes, type(contents) | |
| hxdgst = hashlib.md5( contents ).hexdigest() | |
| assert type(hxdgst) == str, type(hxdgst) | |
| log.debug( f'hxdgst, ``{hxdgst}``') | |
| ## get first and second pairs ----------- | |
| ( first_pair, second_pair ) = ( hxdgst[0:2], hxdgst[2:4] ) | |
| log.debug( f'first_pair, ``{first_pair}``; second_pair, ``{second_pair}``' ) | |
| ## make pair directories ---------------- | |
| new_dir_path = f'{str(destination_files_dir_obj)}/{first_pair}/{second_pair}' | |
| log.debug( f'new_dir_path, ``{new_dir_path}``') | |
| os.makedirs( new_dir_path, mode=0o770, exist_ok=True ) # mode: user/group read/write; does NOT seem to be respected | |
| ## copy file ---------------------------- | |
| filename = path_obj.name | |
| full_source_path = f'{str(source_files_dir_obj)}/{filename}' | |
| log.debug( f'full_source_path, ``{full_source_path}``') | |
| full_destination_path = f'{new_dir_path}/{filename}' | |
| log.debug( f'full_destination_path, ``{full_destination_path}``') | |
| shutil.copy2( full_source_path, full_destination_path ) | |
| log.debug( 'done!') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment