Skip to content

Instantly share code, notes, and snippets.

@RyanZurrin
Last active July 1, 2024 21:57
Show Gist options
  • Save RyanZurrin/55b936156e09ce32c016e7f0f716fcd0 to your computer and use it in GitHub Desktop.
Save RyanZurrin/55b936156e09ce32c016e7f0f716fcd0 to your computer and use it in GitHub Desktop.
Condolidates multiple subject csv vtp measure files into a single csv file.
import os
import pandas as pd
import glob
import argparse
from tqdm import tqdm
DEBUG = True
error_log = []
log = []
def extract_subject_id_and_tract_name(name):
try:
# Extract the subject ID and tract name from the given path
parts = name.split('/')
filename = parts[-1]
subject_part = next(part for part in parts if 'sub-' in part)
subject_id = subject_part.split('-')[1].split('_ses')[0] # Remove '_ses' suffix
tract_name = os.path.splitext(filename)[0]
if DEBUG:
print(f'parts: {parts}')
print(f"Name: {name}")
print(f"Subject ID: {subject_id}")
print(f"Tract Name: {tract_name}")
log.append(name)
return subject_id, tract_name
except (IndexError, StopIteration) as e:
error_log.append(name)
if DEBUG:
print(f"Error processing {name}: {e}")
return "Unknown", "Unknown"
def consolidate_csv_files(input_directory, output_file, error_log_file, log_file):
all_files = glob.glob(os.path.join(input_directory, "*.csv"))
consolidated_data = []
for file in tqdm(all_files, desc="Processing CSV files"):
df = pd.read_csv(file)
df[['Subject_ID', 'Tract_Name']] = df['Name'].apply(extract_subject_id_and_tract_name).apply(pd.Series)
df.drop(columns=['Name'], inplace=True)
consolidated_data.append(df)
final_df = pd.concat(consolidated_data, ignore_index=True)
# Reorder columns to ensure 'Subject_ID' and 'Tract_Name' are first
columns = ['Subject_ID', 'Tract_Name'] + [col for col in final_df.columns if col not in ['Subject_ID', 'Tract_Name']]
final_df = final_df[columns]
final_df.to_csv(output_file, index=False)
# Write error log
with open(error_log_file, 'w') as f:
for error in error_log:
f.write(f"{error}\n")
# Write log file
with open(log_file, 'w') as f:
for item in log:
f.write(f"{item}\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Consolidate CSV files into one CSV file.")
parser.add_argument("-i", "--input_directory", required=True, help="Directory containing input CSV files")
parser.add_argument("-o", "--output_file", required=True, help="Path to the output consolidated CSV file")
parser.add_argument("-l", "--log_file", required=True, help="Path to the log file)")
parser.add_argument("-e", "--error_log_file", required=True, help="Path to the error log file")
args = parser.parse_args()
consolidate_csv_files(args.input_directory, args.output_file, args.error_log_file, args.log_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment