Skip to content

Instantly share code, notes, and snippets.

@wanchaol
Created March 7, 2016 00:51
Show Gist options
  • Save wanchaol/f381b5676b761239cba2 to your computer and use it in GitHub Desktop.
Save wanchaol/f381b5676b761239cba2 to your computer and use it in GitHub Desktop.
Transform categorical features to numerical features
## Credit to: http://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn
import pandas as pd
from sklearn.preprocessing import LabelEncoder
class MultiColumnLabelEncoder:
def __init__(self,columns = None):
self.columns = columns
def fit(self,X,y=None):
return self
def transform(self,X):
output = X.copy()
if self.columns is not None:
for col in self.columns:
output[col] = LabelEncoder().fit_transform(output[col])
else:
for colname,col in output.iteritems():
output[colname] = LabelEncoder().fit_transform(col)
return output
def fit_transform(self,X,y=None):
return self.fit(X,y).transform(X)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment