narulkargunjan · July 8, 2014 12:27
diff --git a/gridsearch_caret b/gridsearch_caret
 ##sources: http://caret.r-forge.r-project.org/training.html, http://cran.r-project.org/web/packages/caret/vignettes/caret.pdf

 set.seed(107) #set seed to ensure reproduction if required

 ## do the setup for parallel processing as per the system available (ensure "allowparallel/seed" are set up accordingly)
 ## for unix/ubuntu etc:
 #library(doMC)
 #registerDoMC(cores = 5)
 ##for windows:
 #library(doParallel)
 #registerDoParallel(cores = 8)


 inTrain <- createDataPartition(y = Sonar$Class, 
                               ## the outcome data are needed
                               p = .75,
                               ## The percentage of data in the training set
                               list = FALSE)
 ## By default, createDataPartition does a stratiﬁed random split of the data. 

 training = data[,inTrain]   ## to keep only 75% of random rows
 testing = data[,-inTrain]   ## to keep the rest of the rows for testing

 plsFit <- train(Class ~ .,
                data = training,
                
                method = "pls", 
                ## check "http://caret.r-forge.r-project.org/bytag.html" for all available models

                preProc = c("center", "scale")
                ## Center and scale the predictors for the training set and all future samples.
                ## Many others are available, do check the documentation
                
                ## if only one variable, might just want to search linearly
                tuneLength = 15
                
                ## if user specific grid is to be searched, it can also be specified
                #tuneGrid = data.frame(parm1 = (0:4)/4, parm2=...)
                ## OR
                #tuneGrid = expand.grid(interaction.depth = c(1, 5, 9), n.trees = (1:30)*50, shrinkage = 0.1)
                
                ## parameters related to cross validation and performance measures
                trControl = trainControl(method="repeatedCV", repeats = 3, 
                                         classProbs = TRUE,summaryFunction = twoClassSummary
                                         selectionFunction = "tolerance")
                ## worth looking into trainControl for the resampling methods and parallism options
                ## such as "seeds" and "allowparallel". Also, for choosing optimal model, "best/oneSE" available.
                ## use method = "none" if pre defined parms are to be used.
                
                ## many matrices are available to evaluate models - "RMSE" and "Rsquared" for regression and 
                ## "Accuracy" and "Kappa" for classification.for user specific, use "summaryfunction" above
                metric = ROC # Because we are using summaryfunction
                
                verbose = T ## to indicate the overall training process
                )
 ## Prediction:
 #predict(gbmFit3, newdata = head(testing))
 #predict(gbmFit3, newdata = head(testing), type = "prob")

 ## Comparing multiple models:
 #resamps <- resamples(list(GBM = gbmFit3,
 #                          SVM = svmFit,
 #                          RDA = rdaFit))
 #resamps
 #summary(resamps)
 #trellis.par.set(caretTheme())
 #dotplot(resamps, metric = "ROC")
 #difValues <- diff(resamps)
 #difValues
 #summary(difValues)

 ## To test the best obtained model:
 # confusionMatrix(data = prediced_class, testing$Class)
	##sources: http://caret.r-forge.r-project.org/training.html, http://cran.r-project.org/web/packages/caret/vignettes/caret.pdf

	set.seed(107) #set seed to ensure reproduction if required

	## do the setup for parallel processing as per the system available (ensure "allowparallel/seed" are set up accordingly)
	## for unix/ubuntu etc:
	#library(doMC)
	#registerDoMC(cores = 5)
	##for windows:
	#library(doParallel)
	#registerDoParallel(cores = 8)


	inTrain <- createDataPartition(y = Sonar$Class,
	## the outcome data are needed
	p = .75,
	## The percentage of data in the training set
	list = FALSE)
	## By default, createDataPartition does a stratiﬁed random split of the data.

	training = data[,inTrain] ## to keep only 75% of random rows
	testing = data[,-inTrain] ## to keep the rest of the rows for testing

	plsFit <- train(Class ~ .,
	data = training,

	method = "pls",
	## check "http://caret.r-forge.r-project.org/bytag.html" for all available models

	preProc = c("center", "scale")
	## Center and scale the predictors for the training set and all future samples.
	## Many others are available, do check the documentation

	## if only one variable, might just want to search linearly
	tuneLength = 15

	## if user specific grid is to be searched, it can also be specified
	#tuneGrid = data.frame(parm1 = (0:4)/4, parm2=...)
	## OR
	#tuneGrid = expand.grid(interaction.depth = c(1, 5, 9), n.trees = (1:30)*50, shrinkage = 0.1)

	## parameters related to cross validation and performance measures
	trControl = trainControl(method="repeatedCV", repeats = 3,
	classProbs = TRUE,summaryFunction = twoClassSummary
	selectionFunction = "tolerance")
	## worth looking into trainControl for the resampling methods and parallism options
	## such as "seeds" and "allowparallel". Also, for choosing optimal model, "best/oneSE" available.
	## use method = "none" if pre defined parms are to be used.

	## many matrices are available to evaluate models - "RMSE" and "Rsquared" for regression and
	## "Accuracy" and "Kappa" for classification.for user specific, use "summaryfunction" above
	metric = ROC # Because we are using summaryfunction

	verbose = T ## to indicate the overall training process
	)
	## Prediction:
	#predict(gbmFit3, newdata = head(testing))
	#predict(gbmFit3, newdata = head(testing), type = "prob")

	## Comparing multiple models:
	#resamps <- resamples(list(GBM = gbmFit3,
	# SVM = svmFit,
	# RDA = rdaFit))
	#resamps
	#summary(resamps)
	#trellis.par.set(caretTheme())
	#dotplot(resamps, metric = "ROC")
	#difValues <- diff(resamps)
	#difValues
	#summary(difValues)

	## To test the best obtained model:
	# confusionMatrix(data = prediced_class, testing$Class)