Dataset extraction utilities
- Script for changing directory of meta_df: Link
"""Script to change the directory of the full_path column of the dataset meta_df.""" | |
import argparse | |
import os | |
import shutil | |
import pandas as pd | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--meta_filename', action='store', dest='meta_filename', required=True) | |
parser.add_argument('--meta_column', action='store', dest='meta_column', required=False, default='full_path') | |
parser.add_argument('--new_directory', action='store', dest='new_directory', required=True) | |
command_args = parser.parse_args() | |
meta_filename = command_args.meta_filename | |
meta_column = command_args.meta_column | |
new_directory = command_args.new_directory | |
def change_full_path_directory(df, full_path_col, new_directory): | |
def change_directory(full_path): | |
basename = os.path.basename(full_path) | |
train_or_test_dir = os.path.basename(os.path.dirname(full_path)) | |
new_full_path = os.path.join(new_directory, train_or_test_dir) | |
new_full_path = os.path.join(new_full_path, basename) | |
return new_full_path | |
df.loc[:, full_path_col] = df.loc[:, full_path_col].apply(change_directory) | |
return df | |
print("Loading meta pickle as dataframe: {}".format(meta_filename)) | |
df = pd.read_pickle(meta_filename) | |
print("Changing the full path column {} to new directory have {}".format(meta_column, new_directory)) | |
df = change_full_path_directory(df, meta_column, new_directory) | |
new_meta_filename = os.path.join(os.path.dirname(meta_filename), 'meta.pkl') | |
print("Copying old filename to a backup filename: {}".format(meta_filename + ".bak")) | |
shutil.copyfile(meta_filename, meta_filename + ".bak") | |
print("Saving changed dataframe to new meta pickle filename: {}".format(new_meta_filename)) | |
df.to_pickle(new_meta_filename) |