Last active
July 1, 2024 21:57
-
-
Save RyanZurrin/55b936156e09ce32c016e7f0f716fcd0 to your computer and use it in GitHub Desktop.
Condolidates multiple subject csv vtp measure files into a single csv file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import glob | |
import argparse | |
from tqdm import tqdm | |
DEBUG = True | |
error_log = [] | |
log = [] | |
def extract_subject_id_and_tract_name(name): | |
try: | |
# Extract the subject ID and tract name from the given path | |
parts = name.split('/') | |
filename = parts[-1] | |
subject_part = next(part for part in parts if 'sub-' in part) | |
subject_id = subject_part.split('-')[1].split('_ses')[0] # Remove '_ses' suffix | |
tract_name = os.path.splitext(filename)[0] | |
if DEBUG: | |
print(f'parts: {parts}') | |
print(f"Name: {name}") | |
print(f"Subject ID: {subject_id}") | |
print(f"Tract Name: {tract_name}") | |
log.append(name) | |
return subject_id, tract_name | |
except (IndexError, StopIteration) as e: | |
error_log.append(name) | |
if DEBUG: | |
print(f"Error processing {name}: {e}") | |
return "Unknown", "Unknown" | |
def consolidate_csv_files(input_directory, output_file, error_log_file, log_file): | |
all_files = glob.glob(os.path.join(input_directory, "*.csv")) | |
consolidated_data = [] | |
for file in tqdm(all_files, desc="Processing CSV files"): | |
df = pd.read_csv(file) | |
df[['Subject_ID', 'Tract_Name']] = df['Name'].apply(extract_subject_id_and_tract_name).apply(pd.Series) | |
df.drop(columns=['Name'], inplace=True) | |
consolidated_data.append(df) | |
final_df = pd.concat(consolidated_data, ignore_index=True) | |
# Reorder columns to ensure 'Subject_ID' and 'Tract_Name' are first | |
columns = ['Subject_ID', 'Tract_Name'] + [col for col in final_df.columns if col not in ['Subject_ID', 'Tract_Name']] | |
final_df = final_df[columns] | |
final_df.to_csv(output_file, index=False) | |
# Write error log | |
with open(error_log_file, 'w') as f: | |
for error in error_log: | |
f.write(f"{error}\n") | |
# Write log file | |
with open(log_file, 'w') as f: | |
for item in log: | |
f.write(f"{item}\n") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Consolidate CSV files into one CSV file.") | |
parser.add_argument("-i", "--input_directory", required=True, help="Directory containing input CSV files") | |
parser.add_argument("-o", "--output_file", required=True, help="Path to the output consolidated CSV file") | |
parser.add_argument("-l", "--log_file", required=True, help="Path to the log file)") | |
parser.add_argument("-e", "--error_log_file", required=True, help="Path to the error log file") | |
args = parser.parse_args() | |
consolidate_csv_files(args.input_directory, args.output_file, args.error_log_file, args.log_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment