Skip to content

Instantly share code, notes, and snippets.

@narulkargunjan
Created July 8, 2014 12:26
Show Gist options
  • Save narulkargunjan/70d992870a4e3d00cf54 to your computer and use it in GitHub Desktop.
Save narulkargunjan/70d992870a4e3d00cf54 to your computer and use it in GitHub Desktop.
Search the parameter space using parallel processing and plot the heatmap
# source: http://statcompute.wordpress.com/2013/06/01/grid-search-for-free-parameters-with-parallel-computing/
library(MASS)
data(Boston)
X <- I(as.matrix(Boston[-14]))
st.X <- scale(X)
Y <- I(as.matrix(Boston[14]))
boston <- data.frame(X = st.X, Y)
# DIVIDE THE WHOLE DATA INTO TWO SEPARATE SETS
set.seed(2013)
rows <- sample(1:nrow(boston), nrow(boston) - 200)
set1 <- boston[rows, ]
set2 <- boston[-rows, ]
# LOAD FOREACH PACKAGE
library(doParallel)
registerDoParallel(cores = 8)
library(foreach)
# GRID SEARCH BASED ON THE MINIMUM SSE WITH PARALLEL COMPUTING
cv.sse <-
foreach(b = seq(0, 10, 2), .combine = rbind) %dopar% {
library(foreach)
foreach(n = 1:10, .combine = rbind) %dopar% {
# TRAIN A PROJECTION PURSUIT REGRESSION WITH VARIOUS SETTINGS AND TRAINING DATA
ppreg <- ppr(Y ~ X, data = set1, nterms = n, sm.method = "supsmu", bass = b)
# CALCULATE SSE WITH VALIDATION DATA
test.sse <- sum((set2$Y - predict(ppreg, set2))^2)
data.frame(bass = b, nterms = n, sse = test.sse)
}
}
# PRINT OUT THE BEST SETTING BASED ON THE GRID SEARCH
print(best.setting <- cv.sse[cv.sse$sse == min(cv.sse$sse), ])
# OUTPUT WITH THE LOWEST SSE BY GRID SEARCH #
# bass nterms sse
# 17 2 7 2126.07
# GENERATE A HEAT MAP TO VISUALIZE THE GRID SEARCH OUTCOME
library(ggplot2)
bass <- factor(cv.sse$bass)
nterms <- factor(cv.sse$nterms)
sse <- factor(floor(cv.sse$sse / 100) * 100)
jpeg('cv.jpeg', width = 800, height = 500, quality = 100)
qplot(x = bass, y = nterms, fill = sse, geom = 'tile')
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment