Created
March 7, 2016 00:51
-
-
Save wanchaol/f381b5676b761239cba2 to your computer and use it in GitHub Desktop.
Transform categorical features to numerical features
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Credit to: http://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn | |
import pandas as pd | |
from sklearn.preprocessing import LabelEncoder | |
class MultiColumnLabelEncoder: | |
def __init__(self,columns = None): | |
self.columns = columns | |
def fit(self,X,y=None): | |
return self | |
def transform(self,X): | |
output = X.copy() | |
if self.columns is not None: | |
for col in self.columns: | |
output[col] = LabelEncoder().fit_transform(output[col]) | |
else: | |
for colname,col in output.iteritems(): | |
output[colname] = LabelEncoder().fit_transform(col) | |
return output | |
def fit_transform(self,X,y=None): | |
return self.fit(X,y).transform(X) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment