fernandojunior · August 17, 2016 03:37
diff --git a/gistfile.py b/gistfile.py
 # Source from: http://sachithdhanushka.blogspot.com.br/2013/09/learning-curve-generator-for-learning.html
 # http://scikit-learn.org/stable/auto_examples/model_selection/plot_validation_curve.html
 # http://scikit-learn.org/stable/modules/learning_curve.html
 # http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html
 # http://scikit-learn.org/stable/modules/generated/sklearn.learning_curve.learning_curve.html#sklearn.learning_curve.learning_curve
 # http://scikit-learn.org/stable/modules/learning_curve.html
 # http://www.astroml.org/sklearn_tutorial/practical.html
 # http://stats.stackexchange.com/questions/95797/how-to-split-the-dataset-for-cross-validation-learning-curve-and-final-evaluat
 # https://github.com/fernandojunior/udacity-machine-learning-nanodegree/blob/master/projects/boston_housing/boston_housing.ipynb
 from sklearn.naive_bayes import GaussianNB
 from sklearn.datasets import load_digits
 import sklearn.cross_validation
 
 #loading the digits dataset
 digits = load_digits()

 #seperating data sets for cross validation
 data_train,data_test,target_train,target_test = cross_validation.train_test_split(digits.data,digits.target,test_size = 0.20, random_state = 42)
 
 #assigning the Gaussian Naive Bayes Model
 clf = GaussianNB()
 
 #compute the rms error
 def compute_error(x, y, model):
    yfit = model.predict(x)
    return np.sqrt(np.mean((y - yfit) ** 2))
     
 
 def drawLearningCurve(model):
    sizes = np.linspace(2, 200, 50).astype(int)
    train_error = np.zeros(sizes.shape)
    crossval_error = np.zeros(sizes.shape)
     
    for i,size in enumerate(sizes):
         
        #getting the predicted results of the GaussianNB
        model.fit(data_train[:size,:],target_train[:size])
        predicted = model.predict(data_train)
         
        #compute the validation error
        crossval_error[i] = compute_error(data_test,target_test,model)
         
        #compute the training error
        train_error[i] = compute_error(data_train[:size,:],target_train[:size],model)
        
    #draw the plot
    fig,ax = plt.subplots()
    ax.plot(sizes,crossval_error,lw = 2, label='cross validation error')
    ax.plot(sizes,train_error, lw = 2, label='training error')
    ax.set_xlabel('cross val error')
    ax.set_ylabel('rms error')
     
    ax.legend(loc = 0)
    ax.set_xlim(0,99)
    ax.set_title('Learning Curve' )
         
 drawLearningCurve(clf)
	# Source from: http://sachithdhanushka.blogspot.com.br/2013/09/learning-curve-generator-for-learning.html
	# http://scikit-learn.org/stable/auto_examples/model_selection/plot_validation_curve.html
	# http://scikit-learn.org/stable/modules/learning_curve.html
	# http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html
	# http://scikit-learn.org/stable/modules/generated/sklearn.learning_curve.learning_curve.html#sklearn.learning_curve.learning_curve
	# http://scikit-learn.org/stable/modules/learning_curve.html
	# http://www.astroml.org/sklearn_tutorial/practical.html
	# http://stats.stackexchange.com/questions/95797/how-to-split-the-dataset-for-cross-validation-learning-curve-and-final-evaluat
	# https://github.com/fernandojunior/udacity-machine-learning-nanodegree/blob/master/projects/boston_housing/boston_housing.ipynb
	from sklearn.naive_bayes import GaussianNB
	from sklearn.datasets import load_digits
	import sklearn.cross_validation

	#loading the digits dataset
	digits = load_digits()

	#seperating data sets for cross validation
	data_train,data_test,target_train,target_test = cross_validation.train_test_split(digits.data,digits.target,test_size = 0.20, random_state = 42)

	#assigning the Gaussian Naive Bayes Model
	clf = GaussianNB()

	#compute the rms error
	def compute_error(x, y, model):
	yfit = model.predict(x)
	return np.sqrt(np.mean((y - yfit) ** 2))


	def drawLearningCurve(model):
	sizes = np.linspace(2, 200, 50).astype(int)
	train_error = np.zeros(sizes.shape)
	crossval_error = np.zeros(sizes.shape)

	for i,size in enumerate(sizes):

	#getting the predicted results of the GaussianNB
	model.fit(data_train[:size,:],target_train[:size])
	predicted = model.predict(data_train)

	#compute the validation error
	crossval_error[i] = compute_error(data_test,target_test,model)

	#compute the training error
	train_error[i] = compute_error(data_train[:size,:],target_train[:size],model)

	#draw the plot
	fig,ax = plt.subplots()
	ax.plot(sizes,crossval_error,lw = 2, label='cross validation error')
	ax.plot(sizes,train_error, lw = 2, label='training error')
	ax.set_xlabel('cross val error')
	ax.set_ylabel('rms error')

	ax.legend(loc = 0)
	ax.set_xlim(0,99)
	ax.set_title('Learning Curve' )

	drawLearningCurve(clf)