Created
June 14, 2019 11:57
-
-
Save MariaLavrovskaya/04d94ae703b0a4b7abd999141c50a494 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Treating categorical variables with One-hot-encoding | |
from sklearn import preprocessing | |
le = preprocessing.LabelEncoder() | |
# LabelEncoder for a number of columns | |
class MultiColumnLabelEncoder: | |
def __init__(self, columns = None): | |
self.columns = columns # list of column to encode | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X): | |
''' | |
Transforms columns of X specified in self.columns using | |
LabelEncoder(). If no columns specified, transforms all | |
columns in X. | |
''' | |
output = X.copy() | |
if self.columns is not None: | |
for col in self.columns: | |
output[col] = LabelEncoder().fit_transform(output[col]) | |
else: | |
for colname, col in output.iteritems(): | |
output[colname] = LabelEncoder().fit_transform(col) | |
return output | |
def fit_transform(self, X, y=None): | |
return self.fit(X, y).transform(X) | |
le = MultiColumnLabelEncoder() | |
X_train_le = le.fit_transform(X) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment