Skip to content

Instantly share code, notes, and snippets.

@birkin
Created December 5, 2021 14:06
Show Gist options
  • Select an option

  • Save birkin/f0c66be4dcf219820b255b3acca9f9d1 to your computer and use it in GitHub Desktop.

Select an option

Save birkin/f0c66be4dcf219820b255b3acca9f9d1 to your computer and use it in GitHub Desktop.
playnig with os.makedirs() & pairtree
"""
Skimming <https://realpython.com/working-with-files-in-python/>
for some forgotten reason, I came across the section "Creating Multiple Directories",
which showed how the command:
... os.makedirs( '2018/10/05' )
...will create a hierarchy of directories like:
.
|
└── 2018/
└── 10/
└── 05/
So cool! Which then got me thinking of how that could be useful for creating pairtree paths.
Here's some weekend play for that.
(Also used pathlib for the first time, hence the explicit reminder var names and asserts.)
"""
import hashlib, logging, os, pathlib, pprint, shutil, sys
from pathlib import Path
assert sys.version_info >= (3, 0) # python3
## set up logging -------------------------------
logging.basicConfig(
level='DEBUG',
format='[%(asctime)s] %(levelname)s [%(module)s-%(funcName)s()::%(lineno)d] %(message)s',
datefmt='%d/%b/%Y %H:%M:%S' )
log = logging.getLogger(__name__)
log.info( '\n\nstarting log\n============' )
## set up directory references ------------------
source_files_dir_obj = Path( '../source_files' ).resolve()
assert type(source_files_dir_obj) in [ pathlib.PosixPath, pathlib.WindowsPath ]
destination_files_dir_obj = Path( '../output_directory' ).resolve()
assert type(destination_files_dir_obj) in [ pathlib.PosixPath, pathlib.WindowsPath ]
## get source files -----------------------------
source_path_objs = list( source_files_dir_obj.iterdir() )
assert type(source_path_objs[0]) in [ pathlib.PosixPath, pathlib.WindowsPath ]
log.debug( f'source_path_objs, ``{source_path_objs}`` ' )
source_file_objs = []
for path_obj in source_path_objs:
if path_obj.is_file():
name = path_obj.name
assert type(name) == str
if name[0:1] != '.': # macOS `.DS_Store`
source_file_objs.append( path_obj.resolve() )
log.debug( f'source_file_objs, ``{source_file_objs}`` ' )
## iterate through source files ----------------
for path_obj in source_file_objs:
path_str = str( path_obj )
with open( path_str, 'rb' ) as f: # 'rb' necessary if some of the files are images
## get hash (of contents) ---------------
contents = f.read()
assert type(contents) == bytes, type(contents)
hxdgst = hashlib.md5( contents ).hexdigest()
assert type(hxdgst) == str, type(hxdgst)
log.debug( f'hxdgst, ``{hxdgst}``')
## get first and second pairs -----------
( first_pair, second_pair ) = ( hxdgst[0:2], hxdgst[2:4] )
log.debug( f'first_pair, ``{first_pair}``; second_pair, ``{second_pair}``' )
## make pair directories ----------------
new_dir_path = f'{str(destination_files_dir_obj)}/{first_pair}/{second_pair}'
log.debug( f'new_dir_path, ``{new_dir_path}``')
os.makedirs( new_dir_path, mode=0o770, exist_ok=True ) # mode: user/group read/write; does NOT seem to be respected
## copy file ----------------------------
filename = path_obj.name
full_source_path = f'{str(source_files_dir_obj)}/{filename}'
log.debug( f'full_source_path, ``{full_source_path}``')
full_destination_path = f'{new_dir_path}/{filename}'
log.debug( f'full_destination_path, ``{full_destination_path}``')
shutil.copy2( full_source_path, full_destination_path )
log.debug( 'done!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment