Skip to content

Instantly share code, notes, and snippets.

@matmoody
Created May 17, 2016 03:26
Show Gist options
  • Save matmoody/2f7cbba490416e6a74cdfc5d30cf19fc to your computer and use it in GitHub Desktop.
Save matmoody/2f7cbba490416e6a74cdfc5d30cf19fc to your computer and use it in GitHub Desktop.
Naive Bayes model for weight and gender data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
actid = pd.read_csv("https://raw.githubusercontent.com/Thinkful-Ed/curric-data-001-data-sets/master/ideal-weight/ideal_weight.csv")
actid.head()
# Remove '' from headers
actid.columns = [name.strip("''") for name in actid.columns.values.tolist()]
# Remove '' from sex column
actid['sex'] = actid['sex'].map(lambda x: x.strip("''"))
# Plot ideal and actual weights
plt.hist(actid['ideal'], bins=28, alpha=0.5, label='Ideal')
plt.hist(actid['actual'], bins=28, alpha=0.5, label='Actual')
plt.legend(loc='upper right')
plt.show()
difference = actid['actual'] - actid['ideal']
plt.hist(difference, bins=28)
plt.show()
# Convert 'sex' column to categorical
actid['sex'] = pd.Categorical(actid['sex']).codes
# Male is now 1 and Female 0. (More females than males in the dataset)
print float(sum(actid['sex'])) / float(len(actid['sex']))
# Train test split
from sklearn.cross_validation import train_test_split
y = actid.sex
y = np.array(y)
X = actid[['actual', 'ideal', 'diff']]
X = np.array(X)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.30, random_state=42)
# Build NaiveBayes Model
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf = GaussianNB()
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print sum(y_test != y_pred)
# Predict new round of weights. (Prediction is Male)
first_trial = clf.predict([[145, 160, -15]])
# Predict second round of weights (Prediction is Female)
second_trial = clf.predict([[160, 145, 15]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment