Last active
August 17, 2016 03:37
-
-
Save fernandojunior/c526760375c1675c2df2b19fca77c0ed to your computer and use it in GitHub Desktop.
Learning curve, python, machine learming, training, validation, testing sets, grid search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Source from: http://sachithdhanushka.blogspot.com.br/2013/09/learning-curve-generator-for-learning.html | |
# http://scikit-learn.org/stable/auto_examples/model_selection/plot_validation_curve.html | |
# http://scikit-learn.org/stable/modules/learning_curve.html | |
# http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html | |
# http://scikit-learn.org/stable/modules/generated/sklearn.learning_curve.learning_curve.html#sklearn.learning_curve.learning_curve | |
# http://scikit-learn.org/stable/modules/learning_curve.html | |
# http://www.astroml.org/sklearn_tutorial/practical.html | |
# http://stats.stackexchange.com/questions/95797/how-to-split-the-dataset-for-cross-validation-learning-curve-and-final-evaluat | |
# https://github.com/fernandojunior/udacity-machine-learning-nanodegree/blob/master/projects/boston_housing/boston_housing.ipynb | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.datasets import load_digits | |
import sklearn.cross_validation | |
#loading the digits dataset | |
digits = load_digits() | |
#seperating data sets for cross validation | |
data_train,data_test,target_train,target_test = cross_validation.train_test_split(digits.data,digits.target,test_size = 0.20, random_state = 42) | |
#assigning the Gaussian Naive Bayes Model | |
clf = GaussianNB() | |
#compute the rms error | |
def compute_error(x, y, model): | |
yfit = model.predict(x) | |
return np.sqrt(np.mean((y - yfit) ** 2)) | |
def drawLearningCurve(model): | |
sizes = np.linspace(2, 200, 50).astype(int) | |
train_error = np.zeros(sizes.shape) | |
crossval_error = np.zeros(sizes.shape) | |
for i,size in enumerate(sizes): | |
#getting the predicted results of the GaussianNB | |
model.fit(data_train[:size,:],target_train[:size]) | |
predicted = model.predict(data_train) | |
#compute the validation error | |
crossval_error[i] = compute_error(data_test,target_test,model) | |
#compute the training error | |
train_error[i] = compute_error(data_train[:size,:],target_train[:size],model) | |
#draw the plot | |
fig,ax = plt.subplots() | |
ax.plot(sizes,crossval_error,lw = 2, label='cross validation error') | |
ax.plot(sizes,train_error, lw = 2, label='training error') | |
ax.set_xlabel('cross val error') | |
ax.set_ylabel('rms error') | |
ax.legend(loc = 0) | |
ax.set_xlim(0,99) | |
ax.set_title('Learning Curve' ) | |
drawLearningCurve(clf) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment