Skip to content

Instantly share code, notes, and snippets.

@ybenjo
Created January 24, 2012 13:00
Show Gist options
  • Save ybenjo/1670081 to your computer and use it in GitHub Desktop.
Save ybenjo/1670081 to your computer and use it in GitHub Desktop.
bagging
# simple_bagging.R
library(mvpart)
bootstrap.data <- function(data, size = nrow(data)){
data[sample(nrow(data), size, replace = TRUE), ]
}
mode <- function(data){
data.table <- table(data)
max.value <- names(data.table)[data.table == max(data.table)]
if(length(max.value) == 1){
max.value
}else{
max.value[1]
}
}
majority.voting <- function(data){
sapply(1:nrow(data), function(x){
mode(data[x, ])
})
}
learn.bagging <- function(input.file.name, model.file.name, times){
origin.data <- read.table(input.file.name, header = TRUE, sep = ",")
models <- as.list(NULL)
for(i in 1:times){
train.data <- bootstrap.data(origin.data)
# formula のラベル部分取得のために泣きながら eval 使う
command <- paste(
"rpart(train.data$",
colnames(train.data)[ncol(train.data)] ,
"~., train.data)",
sep=""
)
models[[i]] <- eval(parse(text = command))
}
save(models, file = model.file.name)
}
predict.bagging <- function(model.file.name, test.file.name){
test.data <- read.table(test.file.name, header = TRUE, sep = ",")
load(model.file.name)
model.size <- length(models)
result <- predict(models[[1]], test.data, type = "class")
if(model.size == 1){
result
}else{
for(i in 2:model.size){
result <- cbind(result, predict(models[[i]], test.data, type = "class"))
}
vote.result <- majority.voting(result)
vote.result
}
}
### sample
index <- sample(nrow(iris), nrow(iris)*0.5)
write.table(iris[index, ], "/tmp/iris_train.csv", row.names = F, col.names = T, sep = ",")
write.table(iris[-index, ], "/tmp/iris_test.csv", row.names = F, col.names = T, sep = ",")
learn.bagging("/tmp/iris_train.csv", "/tmp/models", 100)
predict.bagging("/tmp/models", "/tmp/iris_test.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment