FelixChop · March 26, 2020 11:11
diff --git a/encoding_categorical.py b/encoding_categorical.py
 from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
 one_hot_encoder_gender = OneHotEncoder(handle_unknown='ignore')
 one_hot_encoder_gender.fit(train[['Sex']])

 # For Embarked column, there are some missing values. We need to first fill them then encode them.
 imputer_Embarked = SimpleImputer(strategy='most_frequent', add_indicator=True)
 imputer_Embarked.fit(train[['Embarked']])

 transformed_Embarked = \
 pd.DataFrame(imputer_Embarked.transform(train[['Embarked']]), 
             columns=['Embarked', 'Embarked_missing'], 
             index=train.index)

 train = train.drop(columns=['Embarked']).join(transformed_Embarked)
 # Do not forget to fill missing values in the validation and holdout sets

 ordinal_encoder_city = OrdinalEncoder()
 ordinal_encoder_city.fit(train[['Embarked']])
	from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
	one_hot_encoder_gender = OneHotEncoder(handle_unknown='ignore')
	one_hot_encoder_gender.fit(train[['Sex']])

	# For Embarked column, there are some missing values. We need to first fill them then encode them.
	imputer_Embarked = SimpleImputer(strategy='most_frequent', add_indicator=True)
	imputer_Embarked.fit(train[['Embarked']])

	transformed_Embarked = \
	pd.DataFrame(imputer_Embarked.transform(train[['Embarked']]),
	columns=['Embarked', 'Embarked_missing'],
	index=train.index)

	train = train.drop(columns=['Embarked']).join(transformed_Embarked)
	# Do not forget to fill missing values in the validation and holdout sets

	ordinal_encoder_city = OrdinalEncoder()
	ordinal_encoder_city.fit(train[['Embarked']])