Last active
July 27, 2019 00:34
-
-
Save hartikainen/0eeaca69e6a8b2e039d8a1b3dec59802 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
from pprint import pprint | |
import re | |
import shutil | |
RESULT_FILE_REGEXES = ( | |
"^result.json$", | |
"^progress.csv$", | |
"^events.out.tfevents.\\d+.\\w$", | |
) | |
PARAMS_FILE_REGEXES = ( | |
"^params.json$", | |
"^params.pkl$", | |
) | |
CHECKPOINT_DIRECTORY_REGEXES = ( | |
"^checkpoint_\\d+$" | |
) | |
def is_result_file(filename): | |
return any( | |
re.match(result_file_regex, filename) | |
for result_file_regex in RESULT_FILE_REGEXES) | |
def is_params_file(filename): | |
return any( | |
re.match(params_file_regex, filename) | |
for params_file_regex in PARAMS_FILE_REGEXES) | |
def is_checkpoint_directory(dirname): | |
# TODO(hartikainen): might want to check the contents of this directory. | |
# e.g. check `.tune_metadata`, etc. | |
return any( | |
re.match(checkpoint_directory_regex, dirname) | |
for checkpoint_directory_regex in CHECKPOINT_DIRECTORY_REGEXES) | |
def is_trial_directory(root_dir): | |
root, directories, files = next(os.walk(root_dir)) | |
# json logger: params.json, result.json, params.pkl | |
# csv logger: progress.csv | |
# tf logger: events.out.tfevents.1562394433.ray-hopp-2-head-4ba37bcf | |
# log_syncxurz09ic.log | |
result_files = [ | |
filename | |
for filename in files | |
if is_result_file(filename) | |
] | |
params_files = [ | |
filename | |
for filename in files | |
if is_params_file(filename) | |
] | |
# TODO(hartikainen): checkpoint_directories are currently unused here | |
checkpoint_directories = [ | |
directory | |
for directory in directories | |
if is_checkpoint_directory(os.path.join(root, directory)) | |
] | |
# TODO(hartikainen): might want to check if "^log_sync\\d{8}.log$" exists | |
return result_files and params_files | |
def is_experiment_directory(root, directories, files): | |
# 1) experiment_state.json exists -> is experiment | |
experiment_state_paths = glob.glob( | |
os.path.join(root, "experiment_state*.json")) | |
if experiment_state_paths: | |
# TODO(hartikainen): This needs to be fixed. In general, a directory | |
# can have multiple experiment state files. Softlearning experiment | |
# directories shouldn't though. | |
assert len(experiment_state_paths) == 1, experiment_state_paths | |
return True | |
# 2) All the subfolders are trials -> is experiment | |
if directories and all( | |
is_trial_directory(os.path.join(root, directory)) | |
for directory in directories): | |
return True | |
return False | |
def find_all_experiment_directories(root_dir): | |
root_dir = os.path.expanduser(root_dir) | |
root, directories, files = next(os.walk(root_dir)) | |
if is_experiment_directory(root, directories, files): | |
return (root_dir, ) | |
all_experiment_directories = sum(( | |
find_all_experiment_directories(os.path.join(root, directory)) | |
for directory in directories | |
), ()) | |
return all_experiment_directories | |
def prune_and_copy_directory(source_base_directory, | |
target_base_directory, | |
relative_directory): | |
files_to_keep_regexes = ( | |
"^params.json$", | |
"^result.json$", | |
"^progress.csv$", | |
"^experiment_state[\\w-]?.json$" | |
"^experiment_state$", | |
) | |
for root, directories, files in os.walk( | |
os.path.join(source_base_directory, relative_directory)): | |
files_to_copy = [ | |
file_name | |
for file_name in files | |
if any(re.match(regex, file_name) | |
for regex in files_to_keep_regexes) | |
] | |
if files_to_copy: | |
target_directory = os.path.join( | |
target_base_directory, | |
root.replace(source_base_directory, "").strip("/")) | |
if not os.path.exists(target_directory): | |
os.makedirs(target_directory) | |
for file_name in files_to_copy: | |
shutil.copyfile( | |
os.path.join(root, file_name), | |
os.path.join(target_directory, file_name)) | |
def main(): | |
base_directory = os.path.expanduser("~/ray_results") | |
target_directory = "/tmp/data_for_richard" | |
experiment_directories = find_all_experiment_directories(base_directory) | |
pprint(experiment_directories) | |
for experiment_directory in experiment_directories: | |
relative_experiment_directory = experiment_directory.replace( | |
base_directory, "").strip("/") | |
prune_and_copy_directory( | |
base_directory, | |
target_directory, | |
relative_experiment_directory) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment