Skip to content

Instantly share code, notes, and snippets.

@ashok0587
Forked from saihttam/one_hot.py
Created November 2, 2015 18:52
Show Gist options
  • Save ashok0587/7f0631d75b0c4f796008 to your computer and use it in GitHub Desktop.
Save ashok0587/7f0631d75b0c4f796008 to your computer and use it in GitHub Desktop.
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
def one_hot_dataframe(data, cols, replace=False):
""" Takes a dataframe and a list of columns that need to be encoded.
Returns a 3-tuple comprising the data, the vectorized data,
and the fitted vectorizor."""
vec = DictVectorizer(sparse=False)
vecData = pd.DataFrame(vec.fit_transform(data[cols].T.to_dict().values()))
vecData.columns = vec.get_feature_names()
vecData.index = data.index
if replace is True:
data = data.drop(cols, axis=1)
data = data.join(vecData)
return (data, vecData, vec)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment