Skip to content

Instantly share code, notes, and snippets.

@dataprofessor
Last active July 14, 2020 14:21
Show Gist options
  • Save dataprofessor/57c30641b7418a6970e7e0dd761db9e6 to your computer and use it in GitHub Desktop.
Save dataprofessor/57c30641b7418a6970e7e0dd761db9e6 to your computer and use it in GitHub Desktop.
import pandas as pd
penguins = pd.read_csv('penguins_cleaned.csv')
# Ordinal feature encoding
# https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering
df = penguins.copy()
target = 'species'
encode = ['sex','island']
for col in encode:
dummy = pd.get_dummies(df[col], prefix=col)
df = pd.concat([df,dummy], axis=1)
del df[col]
target_mapper = {'Adelie':0, 'Chinstrap':1, 'Gentoo':2}
def target_encode(val):
return target_mapper[val]
df[target] = df[target].apply(target_encode)
# Separating X and Y
X = df.drop(target, axis=1)
Y = df[target]
# Build random forest model
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X, Y)
# Saving the model
import pickle
pickle.dump(clf, open('penguins_clf.pkl', 'wb'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment