Skip to content

Instantly share code, notes, and snippets.

@fernandojunior
Last active August 17, 2016 03:37
Show Gist options
  • Save fernandojunior/c526760375c1675c2df2b19fca77c0ed to your computer and use it in GitHub Desktop.
Save fernandojunior/c526760375c1675c2df2b19fca77c0ed to your computer and use it in GitHub Desktop.
Learning curve, python, machine learming, training, validation, testing sets, grid search
# Source from: http://sachithdhanushka.blogspot.com.br/2013/09/learning-curve-generator-for-learning.html
# http://scikit-learn.org/stable/auto_examples/model_selection/plot_validation_curve.html
# http://scikit-learn.org/stable/modules/learning_curve.html
# http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html
# http://scikit-learn.org/stable/modules/generated/sklearn.learning_curve.learning_curve.html#sklearn.learning_curve.learning_curve
# http://scikit-learn.org/stable/modules/learning_curve.html
# http://www.astroml.org/sklearn_tutorial/practical.html
# http://stats.stackexchange.com/questions/95797/how-to-split-the-dataset-for-cross-validation-learning-curve-and-final-evaluat
# https://github.com/fernandojunior/udacity-machine-learning-nanodegree/blob/master/projects/boston_housing/boston_housing.ipynb
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_digits
import sklearn.cross_validation
#loading the digits dataset
digits = load_digits()
#seperating data sets for cross validation
data_train,data_test,target_train,target_test = cross_validation.train_test_split(digits.data,digits.target,test_size = 0.20, random_state = 42)
#assigning the Gaussian Naive Bayes Model
clf = GaussianNB()
#compute the rms error
def compute_error(x, y, model):
yfit = model.predict(x)
return np.sqrt(np.mean((y - yfit) ** 2))
def drawLearningCurve(model):
sizes = np.linspace(2, 200, 50).astype(int)
train_error = np.zeros(sizes.shape)
crossval_error = np.zeros(sizes.shape)
for i,size in enumerate(sizes):
#getting the predicted results of the GaussianNB
model.fit(data_train[:size,:],target_train[:size])
predicted = model.predict(data_train)
#compute the validation error
crossval_error[i] = compute_error(data_test,target_test,model)
#compute the training error
train_error[i] = compute_error(data_train[:size,:],target_train[:size],model)
#draw the plot
fig,ax = plt.subplots()
ax.plot(sizes,crossval_error,lw = 2, label='cross validation error')
ax.plot(sizes,train_error, lw = 2, label='training error')
ax.set_xlabel('cross val error')
ax.set_ylabel('rms error')
ax.legend(loc = 0)
ax.set_xlim(0,99)
ax.set_title('Learning Curve' )
drawLearningCurve(clf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment