andrefs · December 11, 2017 03:07
diff --git a/classif.R b/classif.R
 ##################
 # classification #
 ##################


 # decision trees
 set.seed(15973)
 dtree <- train(train.d, train.c.vector, method = 'rpart')
 conf.mx.dt <- table(test.c, predict(dtree, test.d))

 tp.d <- conf.mx.dt[1,1] # true positives
 fp.d <- conf.mx.dt[2,1] # false positives
 tn.d <- conf.mx.dt[2,2] # true negatives
 fn.d <- conf.mx.dt[1,2] # false negatives

 tp.e <- conf.mx.dt[2,2] # true positives
 fp.e <- conf.mx.dt[1,2] # false positives
 tn.e <- conf.mx.dt[1,1] # true negatives
 fn.e <- conf.mx.dt[2,1] # false negatives

 error.rate.dt <- (sum(conf.mx.dt)-sum(diag(conf.mx.dt)))/sum(conf.mx.dt)
 #error.rate.dt <- (fp + fn) / (tp + tn + fp + fn)

 precision.dt.d <- tp.d / (tp.d + fp.d)
 precision.dt.e <- tp.e / (tp.e + fp.e)

 recall.dt.d <- tp.d / (tp.d + fn.d)
 recall.dt.e <- tp.e / (tp.e + fn.e)

 f1.dt.d <- 2 * precision.dt.d * recall.dt.d / (precision.dt.d + recall.dt.d)
 f1.dt.e <- 2 * precision.dt.e * recall.dt.e / (precision.dt.e + recall.dt.e)

 macro.f1.dt   <- (f1.dt.d+f1.dt.e)/2


 # k-nearest neighbor
 set.seed(15973)
 knn <- train(train.d, train.c.vector, method = 'knn')

 conf.mx.knn <- table(test.c, predict(knn, test.d))

 tp.d <- conf.mx.knn[1,1] # true positives
 fp.d <- conf.mx.knn[2,1] # false positives
 tn.d <- conf.mx.knn[2,2] # true negatives
 fn.d <- conf.mx.knn[1,2] # false negatives


 tp.e <- conf.mx.knn[2,2] # true positives
 fp.e <- conf.mx.knn[1,2] # false positives
 tn.e <- conf.mx.knn[1,1] # true negatives
 fn.e <- conf.mx.knn[2,1] # false negatives

 error.rate.knn <- (sum(conf.mx.knn)-sum(diag(conf.mx.knn)))/sum(conf.mx.knn)
 #error.rate.knn <- (fp + fn) / (tp + tn + fp + fn)

 precision.knn.d <- tp.d / (tp.d + fp.d)
 precision.knn.e <- tp.e / (tp.e + fp.e)

 recall.knn.d <- tp.d / (tp.d + fn.d)
 recall.knn.e <- tp.e / (tp.e + fn.e)

 f1.knn.d <- 2 * precision.knn.d * recall.knn.d / (precision.knn.d + recall.knn.d)
 f1.knn.e <- 2 * precision.knn.e * recall.knn.e / (precision.knn.e + recall.knn.e)

 macro.f1.knn  <- (f1.knn.d+f1.knn.e)/2


 # naive bayes
 set.seed(15973)
 NB <- make_Weka_classifier("weka/classifiers/bayes/NaiveBayes")
 nbayes <- NB(class ~., train.dc)

 conf.mx.nb <- table(test.c, predict(nbayes, test.d))

 tp.d <- conf.mx.nb[1,1] # true positives
 fp.d <- conf.mx.nb[2,1] # false positives
 tn.d <- conf.mx.nb[2,2] # true negatives
 fn.d <- conf.mx.nb[1,2] # false negatives


 tp.e <- conf.mx.nb[2,2] # true positives
 fp.e <- conf.mx.nb[1,2] # false positives
 tn.e <- conf.mx.nb[1,1] # true negatives
 fn.e <- conf.mx.nb[2,1] # false negatives

 error.rate.nb <- (sum(conf.mx.nb)-sum(diag(conf.mx.nb)))/sum(conf.mx.nb)
 #error.rate.nb <- (fp + fn) / (tp + tn + fp + fn)

 precision.nb.d <- tp.d / (tp.d + fp.d)
 precision.nb.e <- tp.e / (tp.e + fp.e)

 recall.nb.d <- tp.d / (tp.d + fn.d)
 recall.nb.e <- tp.e / (tp.e + fn.e)

 f1.dt.d <- 2 * precision.nb.d * recall.nb.d / (precision.nb.d + recall.nb.d)
 f1.dt.e <- 2 * precision.nb.e * recall.nb.e / (precision.nb.e + recall.nb.e)

 macro.f1.nb   <- (f1.nb.d+f1.nb.e)/2


 # neural networks
 set.seed(15973)
 nnets <- train(train.d, train.c.vector, method = 'nnet')

 conf.mx.nn <- table(test.c, predict(nnets, test.d))

 tp.d <- conf.mx.nn[1,1] # true positives
 fp.d <- conf.mx.nn[2,1] # false positives
 tn.d <- conf.mx.nn[2,2] # true negatives
 fn.d <- conf.mx.nn[1,2] # false negatives


 tp.e <- conf.mx.nn[2,2] # true positives
 fp.e <- conf.mx.nn[1,2] # false positives
 tn.e <- conf.mx.nn[1,1] # true negatives
 fn.e <- conf.mx.nn[2,1] # false negatives

 error.rate.nn <- (sum(conf.mx.nn)-sum(diag(conf.mx.nn)))/sum(conf.mx.nn)
 #error.rate.nn <- (fp + fn) / (tp + tn + fp + fn)

 precision.nn.d <- tp.d / (tp.d + fp.d)
 precision.nn.e <- tp.e / (tp.e + fp.e)

 recall.nn.d <- tp.d / (tp.d + fn.d)
 recall.nn.e <- tp.e / (tp.e + fn.e)

 f1.nn.d <- 2 * precision.nn.d * recall.nn.d / (precision.nn.d + recall.nn.d)
 f1.nn.e <- 2 * precision.nn.e * recall.nn.e / (precision.nn.e + recall.nn.e)

 macro.f1.nn   <- (f1.nn.d+f1.nn.e)/2


 # svm with radial kernel
 set.seed(565)
 svmRad <- train(train.d, train.c.vector, method = 'svmRadial')

 conf.mx.svm <- table(test.c, predict(svmRad, test.d))

 tp.d <- conf.mx.svm[1,1] # true positives
 fp.d <- conf.mx.svm[2,1] # false positives
 tn.d <- conf.mx.svm[2,2] # true negatives
 fn.d <- conf.mx.svm[1,2] # false negatives


 tp.e <- conf.mx.svm[2,2] # true positives
 fp.e <- conf.mx.svm[1,2] # false positives
 tn.e <- conf.mx.svm[1,1] # true negatives
 fn.e <- conf.mx.svm[2,1] # false negatives

 error.rate.svm <- (sum(conf.mx.svm)-sum(diag(conf.mx.svm)))/sum(conf.mx.svm)
 #error.rate.svm <- (fp + fn) / (tp + tn + fp + fn)

 precision.svm.d <- tp.d / (tp.d + fp.d)
 precision.svm.e <- tp.e / (tp.e + fp.e)

 recall.svm.d <- tp.d / (tp.d + fn.d)
 recall.svm.e <- tp.e / (tp.e + fn.e)

 f1.svm.d <- 2 * precision.svm.d * recall.svm.d / (precision.svm.d + recall.svm.d)
 f1.svm.e <- 2 * precision.svm.e * recall.svm.e / (precision.svm.e + recall.svm.e)

 macro.f1.svm  <- (f1.svm.d+f1.svm.e)/2


 # svm with linear kernel
 set.seed(565)
 svmLin2 <- train(train.d, train.c.vector, method = 'svmLinear2')

 conf.mx.svm2 <- table(test.c, predict(svmRad, test.d))

 tp.d <- conf.mx.svm2[1,1] # true positives
 fp.d <- conf.mx.svm2[2,1] # false positives
 tn.d <- conf.mx.svm2[2,2] # true negatives
 fn.d <- conf.mx.svm2[1,2] # false negatives


 tp.e <- conf.mx.svm2[2,2] # true positives
 fp.e <- conf.mx.svm2[1,2] # false positives
 tn.e <- conf.mx.svm2[1,1] # true negatives
 fn.e <- conf.mx.svm2[2,1] # false negatives

 error.rate.svm2 <- (sum(conf.mx.svm2)-sum(diag(conf.mx.svm2)))/sum(conf.mx.svm2)
 #error.rate.svm2 <- (fp + fn) / (tp + tn + fp + fn)

 precision.svm2.d <- tp.d / (tp.d + fp.d)
 precision.svm2.e <- tp.e / (tp.e + fp.e)

 recall.svm2.d <- tp.d / (tp.d + fn.d)
 recall.svm2.e <- tp.e / (tp.e + fn.e)

 f1.svm2.d <- 2 * precision.svm2.d * recall.svm2.d / (precision.svm2.d + recall.svm2.d)
 f1.svm2.e <- 2 * precision.svm2.e * recall.svm2.e / (precision.svm2.e + recall.svm2.e)

 macro.f1.svm2 <- (f1.svm2.d+f1.svm2.e)/2
	##################
	# classification #
	##################


	# decision trees
	set.seed(15973)
	dtree <- train(train.d, train.c.vector, method = 'rpart')
	conf.mx.dt <- table(test.c, predict(dtree, test.d))

	tp.d <- conf.mx.dt[1,1] # true positives
	fp.d <- conf.mx.dt[2,1] # false positives
	tn.d <- conf.mx.dt[2,2] # true negatives
	fn.d <- conf.mx.dt[1,2] # false negatives

	tp.e <- conf.mx.dt[2,2] # true positives
	fp.e <- conf.mx.dt[1,2] # false positives
	tn.e <- conf.mx.dt[1,1] # true negatives
	fn.e <- conf.mx.dt[2,1] # false negatives

	error.rate.dt <- (sum(conf.mx.dt)-sum(diag(conf.mx.dt)))/sum(conf.mx.dt)
	#error.rate.dt <- (fp + fn) / (tp + tn + fp + fn)

	precision.dt.d <- tp.d / (tp.d + fp.d)
	precision.dt.e <- tp.e / (tp.e + fp.e)

	recall.dt.d <- tp.d / (tp.d + fn.d)
	recall.dt.e <- tp.e / (tp.e + fn.e)

	f1.dt.d <- 2 * precision.dt.d * recall.dt.d / (precision.dt.d + recall.dt.d)
	f1.dt.e <- 2 * precision.dt.e * recall.dt.e / (precision.dt.e + recall.dt.e)

	macro.f1.dt <- (f1.dt.d+f1.dt.e)/2


	# k-nearest neighbor
	set.seed(15973)
	knn <- train(train.d, train.c.vector, method = 'knn')

	conf.mx.knn <- table(test.c, predict(knn, test.d))

	tp.d <- conf.mx.knn[1,1] # true positives
	fp.d <- conf.mx.knn[2,1] # false positives
	tn.d <- conf.mx.knn[2,2] # true negatives
	fn.d <- conf.mx.knn[1,2] # false negatives


	tp.e <- conf.mx.knn[2,2] # true positives
	fp.e <- conf.mx.knn[1,2] # false positives
	tn.e <- conf.mx.knn[1,1] # true negatives
	fn.e <- conf.mx.knn[2,1] # false negatives

	error.rate.knn <- (sum(conf.mx.knn)-sum(diag(conf.mx.knn)))/sum(conf.mx.knn)
	#error.rate.knn <- (fp + fn) / (tp + tn + fp + fn)

	precision.knn.d <- tp.d / (tp.d + fp.d)
	precision.knn.e <- tp.e / (tp.e + fp.e)

	recall.knn.d <- tp.d / (tp.d + fn.d)
	recall.knn.e <- tp.e / (tp.e + fn.e)

	f1.knn.d <- 2 * precision.knn.d * recall.knn.d / (precision.knn.d + recall.knn.d)
	f1.knn.e <- 2 * precision.knn.e * recall.knn.e / (precision.knn.e + recall.knn.e)

	macro.f1.knn <- (f1.knn.d+f1.knn.e)/2


	# naive bayes
	set.seed(15973)
	NB <- make_Weka_classifier("weka/classifiers/bayes/NaiveBayes")
	nbayes <- NB(class ~., train.dc)

	conf.mx.nb <- table(test.c, predict(nbayes, test.d))

	tp.d <- conf.mx.nb[1,1] # true positives
	fp.d <- conf.mx.nb[2,1] # false positives
	tn.d <- conf.mx.nb[2,2] # true negatives
	fn.d <- conf.mx.nb[1,2] # false negatives


	tp.e <- conf.mx.nb[2,2] # true positives
	fp.e <- conf.mx.nb[1,2] # false positives
	tn.e <- conf.mx.nb[1,1] # true negatives
	fn.e <- conf.mx.nb[2,1] # false negatives

	error.rate.nb <- (sum(conf.mx.nb)-sum(diag(conf.mx.nb)))/sum(conf.mx.nb)
	#error.rate.nb <- (fp + fn) / (tp + tn + fp + fn)

	precision.nb.d <- tp.d / (tp.d + fp.d)
	precision.nb.e <- tp.e / (tp.e + fp.e)

	recall.nb.d <- tp.d / (tp.d + fn.d)
	recall.nb.e <- tp.e / (tp.e + fn.e)

	f1.dt.d <- 2 * precision.nb.d * recall.nb.d / (precision.nb.d + recall.nb.d)
	f1.dt.e <- 2 * precision.nb.e * recall.nb.e / (precision.nb.e + recall.nb.e)

	macro.f1.nb <- (f1.nb.d+f1.nb.e)/2


	# neural networks
	set.seed(15973)
	nnets <- train(train.d, train.c.vector, method = 'nnet')

	conf.mx.nn <- table(test.c, predict(nnets, test.d))

	tp.d <- conf.mx.nn[1,1] # true positives
	fp.d <- conf.mx.nn[2,1] # false positives
	tn.d <- conf.mx.nn[2,2] # true negatives
	fn.d <- conf.mx.nn[1,2] # false negatives


	tp.e <- conf.mx.nn[2,2] # true positives
	fp.e <- conf.mx.nn[1,2] # false positives
	tn.e <- conf.mx.nn[1,1] # true negatives
	fn.e <- conf.mx.nn[2,1] # false negatives

	error.rate.nn <- (sum(conf.mx.nn)-sum(diag(conf.mx.nn)))/sum(conf.mx.nn)
	#error.rate.nn <- (fp + fn) / (tp + tn + fp + fn)

	precision.nn.d <- tp.d / (tp.d + fp.d)
	precision.nn.e <- tp.e / (tp.e + fp.e)

	recall.nn.d <- tp.d / (tp.d + fn.d)
	recall.nn.e <- tp.e / (tp.e + fn.e)

	f1.nn.d <- 2 * precision.nn.d * recall.nn.d / (precision.nn.d + recall.nn.d)
	f1.nn.e <- 2 * precision.nn.e * recall.nn.e / (precision.nn.e + recall.nn.e)

	macro.f1.nn <- (f1.nn.d+f1.nn.e)/2


	# svm with radial kernel
	set.seed(565)
	svmRad <- train(train.d, train.c.vector, method = 'svmRadial')

	conf.mx.svm <- table(test.c, predict(svmRad, test.d))

	tp.d <- conf.mx.svm[1,1] # true positives
	fp.d <- conf.mx.svm[2,1] # false positives
	tn.d <- conf.mx.svm[2,2] # true negatives
	fn.d <- conf.mx.svm[1,2] # false negatives


	tp.e <- conf.mx.svm[2,2] # true positives
	fp.e <- conf.mx.svm[1,2] # false positives
	tn.e <- conf.mx.svm[1,1] # true negatives
	fn.e <- conf.mx.svm[2,1] # false negatives

	error.rate.svm <- (sum(conf.mx.svm)-sum(diag(conf.mx.svm)))/sum(conf.mx.svm)
	#error.rate.svm <- (fp + fn) / (tp + tn + fp + fn)

	precision.svm.d <- tp.d / (tp.d + fp.d)
	precision.svm.e <- tp.e / (tp.e + fp.e)

	recall.svm.d <- tp.d / (tp.d + fn.d)
	recall.svm.e <- tp.e / (tp.e + fn.e)

	f1.svm.d <- 2 * precision.svm.d * recall.svm.d / (precision.svm.d + recall.svm.d)
	f1.svm.e <- 2 * precision.svm.e * recall.svm.e / (precision.svm.e + recall.svm.e)

	macro.f1.svm <- (f1.svm.d+f1.svm.e)/2


	# svm with linear kernel
	set.seed(565)
	svmLin2 <- train(train.d, train.c.vector, method = 'svmLinear2')

	conf.mx.svm2 <- table(test.c, predict(svmRad, test.d))

	tp.d <- conf.mx.svm2[1,1] # true positives
	fp.d <- conf.mx.svm2[2,1] # false positives
	tn.d <- conf.mx.svm2[2,2] # true negatives
	fn.d <- conf.mx.svm2[1,2] # false negatives


	tp.e <- conf.mx.svm2[2,2] # true positives
	fp.e <- conf.mx.svm2[1,2] # false positives
	tn.e <- conf.mx.svm2[1,1] # true negatives
	fn.e <- conf.mx.svm2[2,1] # false negatives

	error.rate.svm2 <- (sum(conf.mx.svm2)-sum(diag(conf.mx.svm2)))/sum(conf.mx.svm2)
	#error.rate.svm2 <- (fp + fn) / (tp + tn + fp + fn)

	precision.svm2.d <- tp.d / (tp.d + fp.d)
	precision.svm2.e <- tp.e / (tp.e + fp.e)

	recall.svm2.d <- tp.d / (tp.d + fn.d)
	recall.svm2.e <- tp.e / (tp.e + fn.e)

	f1.svm2.d <- 2 * precision.svm2.d * recall.svm2.d / (precision.svm2.d + recall.svm2.d)
	f1.svm2.e <- 2 * precision.svm2.e * recall.svm2.e / (precision.svm2.e + recall.svm2.e)

	macro.f1.svm2 <- (f1.svm2.d+f1.svm2.e)/2