Skip to content

Instantly share code, notes, and snippets.

@mwacc
Created January 12, 2014 15:23
Show Gist options
  • Save mwacc/8385950 to your computer and use it in GitHub Desktop.
Save mwacc/8385950 to your computer and use it in GitHub Desktop.
sample of using random forest to predict future :)
# read dataset from local file
data <- read.csv("/Users/kostya/Downloads/abalone.data.csv", header=F)
# set names for dataframe columns
colnames(data) <- c('Sex', 'Length', 'Diameter', 'Height', 'WholeWeight', 'ShuckedWeight',
'VisceraWeight', 'ShellWeight', 'Rings')
# split dataset into train and test seta
train.size <- floor(0.9 * nrow(data))
train <- data[1:train.size, ]
test <- data[(train.size+1):nrow(data), ]
# get the names of the columns
props <- names(data[,-length(names(data))])
props <- props[! props %in% 'Rings']
n <- length(props)
# construct all possible combinations
id <- unlist(
lapply(1:n,
function(i) combn(1:n,i,simplify=F)
)
,recursive=F)
# and paste them to formula
Formulas <- sapply(id, function(i)
paste("Rings~",paste(props[i],collapse="+"))
)
# evaluate all formulas
rf <- lapply(Formulas, function(i)
randomForest(as.formula(i), data=train, ntree=70))
# pick up the formula based on the best prediction
bestRF <- rf[[1]]
bestRsq <- bestRF$rsq[ length(bestRF$rsq) ]
for(i in 2:length(rf)) {
if( rf[[i]]$rsq[ length(rf[[i]]$rsq) ] > bestRsq ) {
bestRF <- rf[[i]]
bestRsq <- bestRF$rsq[ length(bestRF$rsq) ]
}
}
# predict
head( predict(bestRF, newdata=test) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment