Skip to content

Instantly share code, notes, and snippets.

@abarmat
Created June 20, 2016 21:09
Show Gist options
  • Save abarmat/d3262a210b925c2bb85a92b989c0cf28 to your computer and use it in GitHub Desktop.
Save abarmat/d3262a210b925c2bb85a92b989c0cf28 to your computer and use it in GitHub Desktop.
DMUBA TP2 AA - Random Forest & J48
setwd("D:/Datamining")
# para eliminar error de java
if (Sys.getenv("JAVA_HOME")!="")
Sys.setenv(JAVA_HOME="")
# paquetes requeridos
if(!require (RWeka)){
install.packages('RWeka')
require (RWeka)
}
if(!require (xlsx)){
install.packages('xlsx')
require (xlsx)
}
if(!require (FSelector)){
install.packages('FSelector')
require (FSelector)
}
if(!require (randomForest)){
install.packages('randomForest')
library(randomForest)
}
library(caret)
loadFile()
informe.J48 = as.data.frame(matrix(ncol = 5))
colnames(informe.J48) = c("confianza", "nodos", "hojas", "aciertos train", "aciertos test")
# saco el 80 % de ids de la muestra
ratio<- 0.7
# hago un muestreo sobre muestra del ratio eslecificado por default 80
trainId <- sample(1:nrow(cargaAux),size=(nrow(cargaAux)*ratio))
cargaAux.train <- na.omit(cargaAux[trainId,-1])
cargaAux.test<- na.omit(cargaAux[-trainId,-1])
confianza <- 0
while (confianza <= 0.5) {
confianza <- confianza + 0.025
print (c( format(Sys.time(), "%d-%m-%Y_%H-%M-%OS") , "J48", confianza ))
#
# Algoritmo J48
#
currentTree.train <- J48(Clase ~ ., data = cargaAux.train,control = Weka_control(C = confianza) )
predictWeka <- evaluate_Weka_classifier(currentTree.train,newdata = cargaAux.test)
# confianza, nodos , cantidad hojas
nodos <- currentTree.train$classifier$measureTreeSize()
hojas <- currentTree.train$classifier$measureNumLeaves()
aciertosTrain <- summary(currentTree.train)$details["pctCorrect"]
aciertosTest <- predictWeka$details["pctCorrect"]
informe.J48 = rbind(informe.J48, c(confianza, nodos, hojas, aciertosTrain, aciertosTest))
}
set.seed(415)
arboles<-1000
informe.RF = as.data.frame(matrix(ncol = 2))
colnames(informe.RF) = c("Arboles", "Acierto")
while (arboles <= 3000) {
print (c( format(Sys.time(), "%d-%m-%Y_%H-%M-%OS") , "Ramdom Forest", arboles ))
#
# Algoritmo Ramdom Forest
#
fit <- randomForest(as.factor(Clase) ~ lugar+ tipoprop + piso + sup_tot_m2 + sup_cub_m2+
cant_amb,
data=cargaAux.train,
importance=TRUE,
ntree=arboles)
arboles <- arboles + 250
Prediction <- predict(fit, cargaAux.test)
result <- confusionMatrix(cargaAux.test$Clase,Prediction)
precision <- as.data.frame(result$overall)[1,]
informe.RF = rbind(informe.RF, arboles, precision)
}
importance(fit)
varImpPlot(fit)
write.csv(informe.RF, file = "firstforest.csv", row.names = FALSE)
loadFile<- function(){
# read dataset crea por default un data frame
carga<-read.xlsx("tp2-work.xlsx",1)
cargaAux= carga[,c(2,5,6,9,10,11,12,15)]
cargaAux$Clase= as.factor(cargaAux$Clase)
cargaAux
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment