vlavorini · January 14, 2018 14:09
diff --git a/Imputation_scaling.py b/Imputation_scaling.py
 import pandas as pd
 from sklearn_pandas import DataFrameMapper
 from sklearn.preprocessing import StandardScaler, Imputer, MinMaxScaler, QuantileTransformer

 #loading the data
 df=pd.read_csv("train.csv") 

 #counting the null features per user
 df["N/A"]=np.sum((df[[col for col in df_best_usrs_prv_dummy.columns]].isna()).values, axis=1) 

 #selecting the column (features) that need different preprocessing
 categorical_columns=["sex", "country", "higher_education", "cat2"]
 integer_colums=["age", "int1", "int2"]
 float_columns=["float1", "float2"]
 skewed_columns=["skewed1", "skewed2"]

 #divide the categorical features
 df=pd.get_dummies(df, columns=categorical_columns)

 #fill the null values and rescale the features previously selected
 tuples_int=[([colname], [Imputer(strategy="most_frequent"), StandardScaler()]) for colname in  integer_colums]
 tuples_float=[([colname], [Imputer(strategy="mean"), MinMaxScaler()]) for colname in  float_columns]
 tuples_skewed=[([colname], [Imputer(strategy="mean"), QuantileTransformer()]) for colname in  skewed_colums]
 features2transf=tuples_int+tuples_float+tuples_skewed

 #creating the mapping for the features
 mapper=DataFrameMapper(features=features2transf, df_out=True, default=None)

 #the actual preprocessing
 df_preprocessed=mapper.fit_transform(df)
	import pandas as pd
	from sklearn_pandas import DataFrameMapper
	from sklearn.preprocessing import StandardScaler, Imputer, MinMaxScaler, QuantileTransformer

	#loading the data
	df=pd.read_csv("train.csv")

	#counting the null features per user
	df["N/A"]=np.sum((df[[col for col in df_best_usrs_prv_dummy.columns]].isna()).values, axis=1)

	#selecting the column (features) that need different preprocessing
	categorical_columns=["sex", "country", "higher_education", "cat2"]
	integer_colums=["age", "int1", "int2"]
	float_columns=["float1", "float2"]
	skewed_columns=["skewed1", "skewed2"]

	#divide the categorical features
	df=pd.get_dummies(df, columns=categorical_columns)

	#fill the null values and rescale the features previously selected
	tuples_int=[([colname], [Imputer(strategy="most_frequent"), StandardScaler()]) for colname in integer_colums]
	tuples_float=[([colname], [Imputer(strategy="mean"), MinMaxScaler()]) for colname in float_columns]
	tuples_skewed=[([colname], [Imputer(strategy="mean"), QuantileTransformer()]) for colname in skewed_colums]
	features2transf=tuples_int+tuples_float+tuples_skewed

	#creating the mapping for the features
	mapper=DataFrameMapper(features=features2transf, df_out=True, default=None)

	#the actual preprocessing
	df_preprocessed=mapper.fit_transform(df)