Skip to content

Instantly share code, notes, and snippets.

@MariaLavrovskaya
Created June 14, 2019 11:57
Show Gist options
  • Save MariaLavrovskaya/04d94ae703b0a4b7abd999141c50a494 to your computer and use it in GitHub Desktop.
Save MariaLavrovskaya/04d94ae703b0a4b7abd999141c50a494 to your computer and use it in GitHub Desktop.
#Treating categorical variables with One-hot-encoding
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
# LabelEncoder for a number of columns
class MultiColumnLabelEncoder:
def __init__(self, columns = None):
self.columns = columns # list of column to encode
def fit(self, X, y=None):
return self
def transform(self, X):
'''
Transforms columns of X specified in self.columns using
LabelEncoder(). If no columns specified, transforms all
columns in X.
'''
output = X.copy()
if self.columns is not None:
for col in self.columns:
output[col] = LabelEncoder().fit_transform(output[col])
else:
for colname, col in output.iteritems():
output[colname] = LabelEncoder().fit_transform(col)
return output
def fit_transform(self, X, y=None):
return self.fit(X, y).transform(X)
le = MultiColumnLabelEncoder()
X_train_le = le.fit_transform(X)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment