matmoody · May 17, 2016 03:26
diff --git a/nb.py b/nb.py
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 %matplotlib inline
 import seaborn as sns


 actid = pd.read_csv("https://raw.githubusercontent.com/Thinkful-Ed/curric-data-001-data-sets/master/ideal-weight/ideal_weight.csv")

 actid.head()

 # Remove '' from headers
 actid.columns = [name.strip("''") for name in actid.columns.values.tolist()]

 # Remove '' from sex column
 actid['sex'] = actid['sex'].map(lambda x: x.strip("''"))

 # Plot ideal and actual weights
 plt.hist(actid['ideal'], bins=28, alpha=0.5, label='Ideal')
 plt.hist(actid['actual'], bins=28, alpha=0.5, label='Actual')
 plt.legend(loc='upper right')
 plt.show()

 difference = actid['actual'] - actid['ideal']
 plt.hist(difference, bins=28)
 plt.show()

 # Convert 'sex' column to categorical
 actid['sex'] = pd.Categorical(actid['sex']).codes

 # Male is now 1 and Female 0. (More females than males in the dataset)
 print float(sum(actid['sex'])) / float(len(actid['sex']))


 # Train test split
 from sklearn.cross_validation import train_test_split

 y = actid.sex
 y = np.array(y)

 X = actid[['actual', 'ideal', 'diff']]
 X = np.array(X)

 X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.30, random_state=42)

 # Build NaiveBayes Model
 from sklearn.naive_bayes import GaussianNB

 clf = GaussianNB()

 clf = GaussianNB()
 clf.fit(X_train,y_train)

 y_pred = clf.predict(X_test)

 print sum(y_test != y_pred)

 # Predict new round of weights. (Prediction is Male)
 first_trial = clf.predict([[145, 160, -15]])

 # Predict second round of weights (Prediction is Female)
 second_trial = clf.predict([[160, 145, 15]])
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	%matplotlib inline
	import seaborn as sns


	actid = pd.read_csv("https://raw.githubusercontent.com/Thinkful-Ed/curric-data-001-data-sets/master/ideal-weight/ideal_weight.csv")

	actid.head()

	# Remove '' from headers
	actid.columns = [name.strip("''") for name in actid.columns.values.tolist()]

	# Remove '' from sex column
	actid['sex'] = actid['sex'].map(lambda x: x.strip("''"))

	# Plot ideal and actual weights
	plt.hist(actid['ideal'], bins=28, alpha=0.5, label='Ideal')
	plt.hist(actid['actual'], bins=28, alpha=0.5, label='Actual')
	plt.legend(loc='upper right')
	plt.show()

	difference = actid['actual'] - actid['ideal']
	plt.hist(difference, bins=28)
	plt.show()

	# Convert 'sex' column to categorical
	actid['sex'] = pd.Categorical(actid['sex']).codes

	# Male is now 1 and Female 0. (More females than males in the dataset)
	print float(sum(actid['sex'])) / float(len(actid['sex']))


	# Train test split
	from sklearn.cross_validation import train_test_split

	y = actid.sex
	y = np.array(y)

	X = actid[['actual', 'ideal', 'diff']]
	X = np.array(X)

	X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.30, random_state=42)

	# Build NaiveBayes Model
	from sklearn.naive_bayes import GaussianNB

	clf = GaussianNB()

	clf = GaussianNB()
	clf.fit(X_train,y_train)

	y_pred = clf.predict(X_test)

	print sum(y_test != y_pred)

	# Predict new round of weights. (Prediction is Male)
	first_trial = clf.predict([[145, 160, -15]])

	# Predict second round of weights (Prediction is Female)
	second_trial = clf.predict([[160, 145, 15]])