import numpy, sklearn import pandas as pd import os # os.chdir('./Ranger Data/data') def csv2dataframe(fileobj): """Returns csv file as a pandas dataframe""" with open(fileobj, 'r') as f: return pd.read_csv(f, header=0) df_list = [csv2dataframe(file) for file in os.listdir(os.getcwd()) if file.endswith('.csv')] super_df = pd.concat(df_list, axis=0) # due to trash values in the original excel files, drop all the empty columns which are named 'unnamed' # this still leaves us with 35 features to look at. cols_to_keep = [col for col in super_df.columns if 'Unnamed' not in col] super_df = super_df[cols_to_keep]