Created
May 17, 2016 03:26
-
-
Save matmoody/2f7cbba490416e6a74cdfc5d30cf19fc to your computer and use it in GitHub Desktop.
Naive Bayes model for weight and gender data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
import seaborn as sns | |
actid = pd.read_csv("https://raw.githubusercontent.com/Thinkful-Ed/curric-data-001-data-sets/master/ideal-weight/ideal_weight.csv") | |
actid.head() | |
# Remove '' from headers | |
actid.columns = [name.strip("''") for name in actid.columns.values.tolist()] | |
# Remove '' from sex column | |
actid['sex'] = actid['sex'].map(lambda x: x.strip("''")) | |
# Plot ideal and actual weights | |
plt.hist(actid['ideal'], bins=28, alpha=0.5, label='Ideal') | |
plt.hist(actid['actual'], bins=28, alpha=0.5, label='Actual') | |
plt.legend(loc='upper right') | |
plt.show() | |
difference = actid['actual'] - actid['ideal'] | |
plt.hist(difference, bins=28) | |
plt.show() | |
# Convert 'sex' column to categorical | |
actid['sex'] = pd.Categorical(actid['sex']).codes | |
# Male is now 1 and Female 0. (More females than males in the dataset) | |
print float(sum(actid['sex'])) / float(len(actid['sex'])) | |
# Train test split | |
from sklearn.cross_validation import train_test_split | |
y = actid.sex | |
y = np.array(y) | |
X = actid[['actual', 'ideal', 'diff']] | |
X = np.array(X) | |
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.30, random_state=42) | |
# Build NaiveBayes Model | |
from sklearn.naive_bayes import GaussianNB | |
clf = GaussianNB() | |
clf = GaussianNB() | |
clf.fit(X_train,y_train) | |
y_pred = clf.predict(X_test) | |
print sum(y_test != y_pred) | |
# Predict new round of weights. (Prediction is Male) | |
first_trial = clf.predict([[145, 160, -15]]) | |
# Predict second round of weights (Prediction is Female) | |
second_trial = clf.predict([[160, 145, 15]]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment