Skip to content

Instantly share code, notes, and snippets.

@fdavidcl
Created November 22, 2017 23:18
Show Gist options
  • Save fdavidcl/717337535629a16cd50bffa07fd62242 to your computer and use it in GitHub Desktop.
Save fdavidcl/717337535629a16cd50bffa07fd62242 to your computer and use it in GitHub Desktop.
An experiment on autoencoding Wisconsin Breast Cancer Diagnosis dataset
---
title: "WDBC"
author: "David Charte"
date: "22 de noviembre de 2017"
output:
html_document: default
---
```{r setup, include=FALSE, echo = F, results = "hide", message=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(foreign)
library(RWeka)
library(xgboost)
library(ruta)
library(rutavis)
library(e1071)
library(partykit)
library(randomForest)
library(caret)
library(webshot)
# wdbc <- read.arff("wdbc.arff")
wdbc <- read.csv("wdbc.csv")
cv <- function(data, predictions) {
set.seed(12345678)
folds <- createFolds(data[, 1], k = 10)
accuracy <- sapply(folds, function(test) {
pred <- predictions(data[-test, ], data[test, -1])
sum(pred == data[test, 1]) / length(test)
})
}
model <- function(method) {
list(
c45 = function(tra, tst) {
predict(J48(class ~ ., data = tra), tst)
},
svm = function(tra, tst) {
predict(svm(class ~ ., data = tra), tst)
},
rf = function(tra, tst) {
predict(randomForest(class ~ ., data = tra), tst)
},
xg = function(tra, tst) {
mod <- xgboost(data = as.matrix(tra[, -1]),
label = as.numeric(tra[, 1]) - 1,
max_depth = 2, eta = 1, nthread = 4,
nrounds = 2, objective = "binary:logistic")
pred <- predict(mod, as.matrix(tst))
pred <- as.numeric(pred > 0.5)
pred <- as.factor(pred)
levels(pred) <- levels(tra[, 1])
pred
}
)[[method]]
}
autoencode <- function(dat, cl = 1, hidden, seed = 8912345, ...) {
set.seed(seed)
mxnet::mx.set.seed(seed)
ae <- ruta.makeLearner("autoencoder",
hidden = c(ncol(dat) - 1, hidden, ncol(dat) - 1),
activation = "leaky")
tk <- ruta.makeTask(data = dat, cl = cl)
aemd <- ruta::train(
ae,
tk,
epochs = 200,
optimizer = "adam",
#momentum = 0.002,
learning.rate = 0.02,
initializer.scale = 1,
wd = 0.01,
...
)
encoded <- as.data.frame(ruta.deepFeatures(aemd, tk))
encoded <- cbind(dat[, 1], encoded)
names(encoded) <- c("class", paste0("v", as.character(1:(ncol(encoded)-1))))
list(aemd, encoded, tk)
}
run <- function(methods) {
res <- ""
encoded5 <- autoencode(wdbc, hidden = 5)
encoded3 <- autoencode(wdbc, hidden = 3)
for (method in methods) {
res <- paste0(res, "Method: ", method, "\n==================\n")
ac <- cv(wdbc, model(method))
res <- paste0(res, "Model trained with original dataset:\n")
res <- paste0(res, " Mean accuracy: ", mean(ac), ", std dev:", sd(ac))
ac <- cv(encoded5[[2]], model(method))
res <- paste0(res, "\nModel trained with 5 autoencoded variables:\n")
res <- paste0(res, " Mean accuracy: ", mean(ac), ", std dev:", sd(ac))
ac <- cv(encoded3[[2]], model(method))
res <- paste0(res, "\nModel trained with 3 autoencoded variables:\n")
res <- paste0(res, " Mean accuracy: ", mean(ac), ", std dev:", sd(ac))
res <- paste0(res, "\n\n")
}
list(res, encoded5[[1]], encoded3[[1]], encoded5[[3]])
}
```
### Accuracy en test de los algoritmos
```{r, echo = F, results = "hide", message=FALSE, warning = F}
res <- run(c("c45", "svm", "rf", "xg"))
```
```{r, echo = F}
cat("WDBC is a binary dataset consisting of", nrow(wdbc), "observations of", ncol(wdbc), "features.")
```
```{r, echo = F}
cat(res[[1]])
```
```{r, echo = F, warning = F, message = F}
plot(res[[3]], res[[4]], size = 1, sizes = c(1, 2))
```
## Árboles de decisión
```{r, echo = F, results="hide", message=F, warning=F}
set.seed(12345678)
encoded5 <- autoencode(wdbc, hidden = 5)[[2]]
encoded3 <- autoencode(wdbc, hidden = 3)[[2]]
trees <- lapply(list(wdbc, encoded5, encoded3), function(df) {
print(dim(df))
folds <- createFolds(df[, 1], k = 10)
J48(class ~ ., data = df[-folds[[1]],])
})
```
### Datos originales
```{r}
print(trees[[1]])
```
### Datos del autoencoder de 5 variables
```{r}
print(trees[[2]])
```
### Datos del autoencoder de 3 variables
```{r}
print(trees[[3]])
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment