Created
July 8, 2014 12:27
-
-
Save narulkargunjan/5447881245a24e6f0588 to your computer and use it in GitHub Desktop.
Search parameter grid using Caret
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##sources: http://caret.r-forge.r-project.org/training.html, http://cran.r-project.org/web/packages/caret/vignettes/caret.pdf | |
set.seed(107) #set seed to ensure reproduction if required | |
## do the setup for parallel processing as per the system available (ensure "allowparallel/seed" are set up accordingly) | |
## for unix/ubuntu etc: | |
#library(doMC) | |
#registerDoMC(cores = 5) | |
##for windows: | |
#library(doParallel) | |
#registerDoParallel(cores = 8) | |
inTrain <- createDataPartition(y = Sonar$Class, | |
## the outcome data are needed | |
p = .75, | |
## The percentage of data in the training set | |
list = FALSE) | |
## By default, createDataPartition does a stratified random split of the data. | |
training = data[,inTrain] ## to keep only 75% of random rows | |
testing = data[,-inTrain] ## to keep the rest of the rows for testing | |
plsFit <- train(Class ~ ., | |
data = training, | |
method = "pls", | |
## check "http://caret.r-forge.r-project.org/bytag.html" for all available models | |
preProc = c("center", "scale") | |
## Center and scale the predictors for the training set and all future samples. | |
## Many others are available, do check the documentation | |
## if only one variable, might just want to search linearly | |
tuneLength = 15 | |
## if user specific grid is to be searched, it can also be specified | |
#tuneGrid = data.frame(parm1 = (0:4)/4, parm2=...) | |
## OR | |
#tuneGrid = expand.grid(interaction.depth = c(1, 5, 9), n.trees = (1:30)*50, shrinkage = 0.1) | |
## parameters related to cross validation and performance measures | |
trControl = trainControl(method="repeatedCV", repeats = 3, | |
classProbs = TRUE,summaryFunction = twoClassSummary | |
selectionFunction = "tolerance") | |
## worth looking into trainControl for the resampling methods and parallism options | |
## such as "seeds" and "allowparallel". Also, for choosing optimal model, "best/oneSE" available. | |
## use method = "none" if pre defined parms are to be used. | |
## many matrices are available to evaluate models - "RMSE" and "Rsquared" for regression and | |
## "Accuracy" and "Kappa" for classification.for user specific, use "summaryfunction" above | |
metric = ROC # Because we are using summaryfunction | |
verbose = T ## to indicate the overall training process | |
) | |
## Prediction: | |
#predict(gbmFit3, newdata = head(testing)) | |
#predict(gbmFit3, newdata = head(testing), type = "prob") | |
## Comparing multiple models: | |
#resamps <- resamples(list(GBM = gbmFit3, | |
# SVM = svmFit, | |
# RDA = rdaFit)) | |
#resamps | |
#summary(resamps) | |
#trellis.par.set(caretTheme()) | |
#dotplot(resamps, metric = "ROC") | |
#difValues <- diff(resamps) | |
#difValues | |
#summary(difValues) | |
## To test the best obtained model: | |
# confusionMatrix(data = prediced_class, testing$Class) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment