Skip to content

Instantly share code, notes, and snippets.

@abarmat
Created June 27, 2016 14:58
Show Gist options
  • Save abarmat/a709ea0a82bddcf12fb7fba716485b72 to your computer and use it in GitHub Desktop.
Save abarmat/a709ea0a82bddcf12fb7fba716485b72 to your computer and use it in GitHub Desktop.
DMUBA TP2 AA - SVM
library('e1071');
library(Amelia)
FILENAME <- 'tp2-work.csv'
# Randomizer
set.seed(100)
# Read file
data <- read.csv(FILENAME, header=TRUE, sep=";")
# Select features
attr_list = c('anio', 'mes', 'tipoprop', 'lugar', 'sup_tot_m2', 'sup_cub_m2', 'piso', 'cant_amb', 'Clase')
df_all <- data[attr_list]
# Create new feature (combining year and month)
df_all$aniomes <- paste(as.character(data$mes), as.character(data$anio))
df_all$anio <- NULL
df_all$mes <- NULL
# Encode categorical variables
df_all$lugar <- as.numeric(factor(df_all$lugar, levels=unique(df_all$lugar)))
df_all$tipoprop <- as.numeric(factor(df_all$tipoprop, levels=unique(df_all$tipoprop)))
df_all$aniomes <- as.numeric(factor(df_all$aniomes, levels=unique(df_all$aniomes)))
# Convert to numeric
df_all$cant_amb <- as.numeric(df_all$cant_amb)
df_all$sup_tot_m2 <- as.numeric(df_all$sup_tot_m2)
df_all$sup_cub_m2 <- as.numeric(df_all$sup_cub_m2)
df_all$piso <- as.numeric(df_all$piso)
df_all$Clase <- as.numeric(df_all$Clase)
# Imputation
noms = c()
ords = c('piso', 'cant_amb')
idvars = c()
A <- df_all[c('sup_tot_m2', 'sup_cub_m2', 'piso', 'cant_amb', 'Clase')]
B <- amelia(A, noms=noms, ords=ords, idvars=idvars, m=2,
bound=rbind(c(1, 0, Inf), c(2, 0, Inf), c(3, 0, Inf), c(4, 0, Inf)))
df_all$sup_tot_m2 <- B$imputations$imp1$sup_tot_m2
df_all$sup_cub_m2 <- B$imputations$imp1$sup_cub_m2
df_all$piso <- B$imputations$imp1$piso
df_all$cant_amb <- B$imputations$imp1$cant_amb
# Split train/test
n_train <- floor(0.8 * nrow(df_all))
sample_ix <- sample(seq_len(nrow(df_all)), size=n_train)
df_train <- df_all[sample_ix, ]
df_test <- df_all[-sample_ix, ]
# Split class
y_train <- df_train['Clase']-1
X_train <- subset(df_train, select=-c(Clase))
y_test <- df_test['Clase']-1
X_test <- subset(df_test, select=-c(Clase))
# Train
model<-svm(Clase~., df_train, type='C', kernel='radial')
results<-predict(model, X_test)
sum(as.integer(as.numeric(as.vector(results)) - 1 == y_test$Clase))/length(results)
sum(as.integer(as.numeric(as.vector(results)) - 1 == y_train$Clase))/length(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment