RyanZurrin · July 1, 2024 21:57
diff --git a/consolidate_csv_vtp_measures.py b/consolidate_csv_vtp_measures.py
 import os
 import pandas as pd
 import glob
 import argparse
 from tqdm import tqdm

 DEBUG = True
 error_log = []
 log = []

 def extract_subject_id_and_tract_name(name):
    try:
        # Extract the subject ID and tract name from the given path
        parts = name.split('/')
        filename = parts[-1]
        subject_part = next(part for part in parts if 'sub-' in part)
        subject_id = subject_part.split('-')[1].split('_ses')[0]  # Remove '_ses' suffix
        tract_name = os.path.splitext(filename)[0]

        if DEBUG:
            print(f'parts: {parts}')
            print(f"Name: {name}")
            print(f"Subject ID: {subject_id}")
            print(f"Tract Name: {tract_name}")
        
        log.append(name)
        
        return subject_id, tract_name
    except (IndexError, StopIteration) as e:
        error_log.append(name)
        if DEBUG:
            print(f"Error processing {name}: {e}")
        return "Unknown", "Unknown"

 def consolidate_csv_files(input_directory, output_file, error_log_file, log_file):
    all_files = glob.glob(os.path.join(input_directory, "*.csv"))
    consolidated_data = []

    for file in tqdm(all_files, desc="Processing CSV files"):
        df = pd.read_csv(file)
        df[['Subject_ID', 'Tract_Name']] = df['Name'].apply(extract_subject_id_and_tract_name).apply(pd.Series)
        df.drop(columns=['Name'], inplace=True)
        consolidated_data.append(df)

    final_df = pd.concat(consolidated_data, ignore_index=True)

    # Reorder columns to ensure 'Subject_ID' and 'Tract_Name' are first
    columns = ['Subject_ID', 'Tract_Name'] + [col for col in final_df.columns if col not in ['Subject_ID', 'Tract_Name']]
    final_df = final_df[columns]

    final_df.to_csv(output_file, index=False)

    # Write error log
    with open(error_log_file, 'w') as f:
        for error in error_log:
            f.write(f"{error}\n")
            
    # Write log file
    with open(log_file, 'w') as f:
        for item in log:
            f.write(f"{item}\n")

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Consolidate CSV files into one CSV file.")
    parser.add_argument("-i", "--input_directory", required=True, help="Directory containing input CSV files")
    parser.add_argument("-o", "--output_file", required=True, help="Path to the output consolidated CSV file")
    parser.add_argument("-l", "--log_file", required=True, help="Path to the log file)")
    parser.add_argument("-e", "--error_log_file", required=True, help="Path to the error log file")
    
    args = parser.parse_args()
    
    consolidate_csv_files(args.input_directory, args.output_file, args.error_log_file, args.log_file)
	import os
	import pandas as pd
	import glob
	import argparse
	from tqdm import tqdm

	DEBUG = True
	error_log = []
	log = []

	def extract_subject_id_and_tract_name(name):
	try:
	# Extract the subject ID and tract name from the given path
	parts = name.split('/')
	filename = parts[-1]
	subject_part = next(part for part in parts if 'sub-' in part)
	subject_id = subject_part.split('-')[1].split('_ses')[0] # Remove '_ses' suffix
	tract_name = os.path.splitext(filename)[0]

	if DEBUG:
	print(f'parts: {parts}')
	print(f"Name: {name}")
	print(f"Subject ID: {subject_id}")
	print(f"Tract Name: {tract_name}")

	log.append(name)

	return subject_id, tract_name
	except (IndexError, StopIteration) as e:
	error_log.append(name)
	if DEBUG:
	print(f"Error processing {name}: {e}")
	return "Unknown", "Unknown"

	def consolidate_csv_files(input_directory, output_file, error_log_file, log_file):
	all_files = glob.glob(os.path.join(input_directory, "*.csv"))
	consolidated_data = []

	for file in tqdm(all_files, desc="Processing CSV files"):
	df = pd.read_csv(file)
	df[['Subject_ID', 'Tract_Name']] = df['Name'].apply(extract_subject_id_and_tract_name).apply(pd.Series)
	df.drop(columns=['Name'], inplace=True)
	consolidated_data.append(df)

	final_df = pd.concat(consolidated_data, ignore_index=True)

	# Reorder columns to ensure 'Subject_ID' and 'Tract_Name' are first
	columns = ['Subject_ID', 'Tract_Name'] + [col for col in final_df.columns if col not in ['Subject_ID', 'Tract_Name']]
	final_df = final_df[columns]

	final_df.to_csv(output_file, index=False)

	# Write error log
	with open(error_log_file, 'w') as f:
	for error in error_log:
	f.write(f"{error}\n")

	# Write log file
	with open(log_file, 'w') as f:
	for item in log:
	f.write(f"{item}\n")

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Consolidate CSV files into one CSV file.")
	parser.add_argument("-i", "--input_directory", required=True, help="Directory containing input CSV files")
	parser.add_argument("-o", "--output_file", required=True, help="Path to the output consolidated CSV file")
	parser.add_argument("-l", "--log_file", required=True, help="Path to the log file)")
	parser.add_argument("-e", "--error_log_file", required=True, help="Path to the error log file")

	args = parser.parse_args()

	consolidate_csv_files(args.input_directory, args.output_file, args.error_log_file, args.log_file)