winnydejong · February 25, 2019 14:30
diff --git a/dataExplorer.py b/dataExplorer.py
 # Helpful function to look through the columns of a Pandas dataframe
 # By Roland Jeannier, https://medium.com/@rtjeannier/pandas-101-fbb5bf86a9bc
 def eda_helper(df):
    dict_list = []
    for col in df.columns:
        data = df[col]
        dict_ = {}
        # The null count for a column. 
        dict_.update({"null_count" : data.isnull().sum()})
        # Counting the unique values in a column
        dict_.update({"unique_count" : len(data.unique())})
        # Finding the types of data in the column
        # This is useful for finding out potential problems with type mismatches
        dict_.update({"data_type" : set([type(d).__name__ for d in data])})
        #dict_.update({"score" : match[1]})
        dict_list.append(dict_)
    eda_df = pd.DataFrame(dict_list)
    eda_df.index = df.columns
    eda_df.sort_values(by=['null_count', 'unique_count'], ascending=[True, False], inplace=True)
        
    return eda_df
	# Helpful function to look through the columns of a Pandas dataframe
	# By Roland Jeannier, https://medium.com/@rtjeannier/pandas-101-fbb5bf86a9bc
	def eda_helper(df):
	dict_list = []
	for col in df.columns:
	data = df[col]
	dict_ = {}
	# The null count for a column.
	dict_.update({"null_count" : data.isnull().sum()})
	# Counting the unique values in a column
	dict_.update({"unique_count" : len(data.unique())})
	# Finding the types of data in the column
	# This is useful for finding out potential problems with type mismatches
	dict_.update({"data_type" : set([type(d).__name__ for d in data])})
	#dict_.update({"score" : match[1]})
	dict_list.append(dict_)
	eda_df = pd.DataFrame(dict_list)
	eda_df.index = df.columns
	eda_df.sort_values(by=['null_count', 'unique_count'], ascending=[True, False], inplace=True)

	return eda_df