import numpy, sklearn
import pandas as pd
import os

# os.chdir('./Ranger Data/data')

def csv2dataframe(fileobj):
    """Returns csv file as a pandas dataframe"""
    with open(fileobj, 'r') as f:
         return pd.read_csv(f, header=0)
         
df_list = [csv2dataframe(file) for file in os.listdir(os.getcwd()) if file.endswith('.csv')]
super_df = pd.concat(df_list, axis=0)

# due to trash values in the original excel files, drop all the empty columns which are named 'unnamed'
# this still leaves us with 35 features to look at.

cols_to_keep = [col for col in super_df.columns if 'Unnamed' not in col]
super_df = super_df[cols_to_keep]