Last active
January 3, 2019 10:12
-
-
Save louiskounios/ac2ae58dacb6f414c5d4dd4ed39e7420 to your computer and use it in GitHub Desktop.
Python script that flattens a directory without overwriting files that share a name. Takes absolute path to directory as input.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
# Script that flattens a directory without overwriting files that share a name. | |
# Takes absolute path to directory as input. | |
# | |
# All files in any subdirectory of the user-provided directory, regardless of | |
# depth, are moved to the user-provided directory. When filenames clash, | |
# special care is taken by adding a counter to the filename (e.g., '_001'). | |
# | |
# Linux only. Tested only on Python 3.7. Minimum version is 3.6. | |
# | |
# This has NOT been tested extensively. Use at your own risk. | |
import pathlib | |
import sys | |
def flatten_directory(path): | |
if not _is_valid_path(path): | |
return 1 | |
_rename_files(path) | |
return 0 | |
def _is_valid_path(path): | |
if not isinstance(path, pathlib.Path): | |
print('"path" argument must be instance of "pathlib.Path"') | |
return False | |
if not path.is_absolute(): | |
print('Provided path must be an absolute path') | |
return False | |
if not path.is_dir(): | |
print('Provided path must be a directory') | |
return False | |
return True | |
def _top_dir_files(top_dir): | |
return {path for path in top_dir.glob('*') if path.is_file()} | |
def _nested_dir_files(top_dir): | |
return {path for path in top_dir.glob('**/*') | |
if path.is_file() and path.parent != top_dir} | |
def _filename_paths_map(path): | |
top_dir_files = _top_dir_files(path) | |
nested_dir_files = _nested_dir_files(path) | |
filename_paths_map = dict() | |
for file in top_dir_files: | |
filename_paths_map[file.name] = [file] | |
for file in nested_dir_files: | |
if file.name in filename_paths_map: | |
filename_paths_map[file.name].append(file) | |
else: | |
filename_paths_map[file.name] = [file] | |
return filename_paths_map | |
# Returns a suffixless filename. | |
# 'myfile.tar.gz' => 'myfile' | |
def _suffixless_name(path): | |
return path.name.split('.')[0] | |
# Returns the suffixes combined into a string. | |
# 'myfile.tar.gz' => '.tar.gz' | |
def _suffix(path): | |
return ''.join(path.suffixes) | |
# Returns a zero-padded counter suffix. | |
def _counter_suffix(current, max): | |
return '_' + str(current).zfill(len(str(max))) | |
def _rename_files(top_dir): | |
filename_paths_map = _filename_paths_map(top_dir) | |
for _, paths in filename_paths_map.items(): | |
# Filename in top dir does not clash with any other filename. | |
# Do nothing. | |
if len(paths) == 1 and paths[0].parent == top_dir: | |
print(f'{paths[0]} => No action necessary') | |
continue | |
# Filename in nested dir does not clash with any other filename. | |
# Move the file to the top directory. | |
if len(paths) == 1 and paths[0].parent != top_dir: | |
new_name = top_dir / paths[0].name | |
print(f'{paths[0]} => {new_name}') | |
paths[0].rename(new_name) | |
continue | |
# Since we are renaming the top directory file _and_ any files with | |
# the same file in any subdirectory, we can just loop over all files | |
# and move them to the top directory with a running counter added to | |
# their filename to ensure uniqueness. | |
for idx, path in enumerate(paths, start=1): | |
base_name = _suffixless_name(path) | |
suffix = _suffix(path) | |
counter = _counter_suffix(idx, len(paths)) | |
new_name = top_dir / (base_name + counter + suffix) | |
print(f'{path} => {new_name}') | |
path.rename(new_name) | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
print('Need exactly one argument: the absolute path to the directory') | |
sys.exit(1) | |
path = pathlib.Path(sys.argv[1]) | |
sys.exit(flatten_directory(path)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment