import os

import numpy as np
import pandas as pd

# Directory to read in xlsx files from
data_dir = r"data"

output_file = r"output.csv"

# Indexes of interested columns
columns = {
    "TP1": 1,
    "TP2": 2,
    "TP3": 3,
}
# Indexes of interested rows
rows = {
    "TEST": 5,
    "BP": 6,
    "AMBIOLIC": 7,
    "SOMETHING": 14,
    "WELP": 15,
}
# Define entry key
key = {
    "": 0,
    "x": 1,
    "xx": 2,
    "xxx": 3,
    "o": -1,
}

final_df = None

# Iterate over files in data directory, recursively
for subdir, dirs, files in os.walk(data_dir):
    for filename in files:
        # Only look at the xlsx files
        if filename.endswith(".xlsx"):
            # Get the full file path from the file system walk
            filepath = os.path.join(subdir, filename)
            # Open the file into a pandas data frame
            data = pd.read_excel(filepath).replace(np.nan, "", regex=True)
            # Get the id from the filename
            id = os.path.splitext(filename)[0]
            # Make a temporary data frame to hold the data
            entry = pd.DataFrame(columns=[id])

            # Iterate over the interested rows and columns
            for row in rows:
                for col in columns:
                    # Generate the unique variable name
                    variable = "{}_{}".format(row, col)
                    # Set the variable in the temp data frame using the lookup key
                    entry.loc[variable] = key[data.iloc[rows[row], columns[col]]]

            # Transpose entry to be a single row with several columns
            entry = entry.T

            # If a final dataframe hasn't been started, start it
            if final_df is None:
                final_df = entry
            # If there is a final data frame, append the new entry to it
            else:
                final_df = final_df.append(entry)

# Print the final dataframe
print(final_df)
# Output generated data frame as a CSV file
if final_df is not None:
    final_df.to_csv(output_file, index_label="id")