korkridake · December 4, 2018 12:46
diff --git a/MXNet_Classification_Multi_Layer_Perceptron.R b/MXNet_Classification_Multi_Layer_Perceptron.R
 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------
 # Develop a Neural Network with MXNet in Five Minutes
 # R Notebook Author: @Korkrid Akepanidtaworn
 # Tutorial from MXNet Community
 # Source: https://mxnet.incubator.apache.org/tutorials/r/fiveMinutesNeuralNetwork.html
 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------
 library(mlbench)
 library(tidyverse)
 library(caret)
 library(e1071)

 # To install MXNet, you need to run these 4 lines of codes
 cran <- getOption("repos")
 cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/"
 options(repos = cran)
 # install.packages("mxnet")
 library(mxnet)

 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------
 # Load Data
 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------

 # This is the data set used by Gorman and Sejnowski in their study of the classiﬁcation of sonar signals using a neural network [1]. The task is to train a network to discriminate between sonar signals bounced off a metal cylinder and those bounced off a roughly cylindrical rock. Eachpatternisasetof60numbersintherange 0.0to 1.0. Eachnumberrepresentstheenergywithin a particular frequency band, integrated over a certain period of time. The integration aperture for higherfrequenciesoccurlaterintime,sincethesefrequenciesaretransmittedlaterduringthechirp. The label associated with each record contains the letter "R" if the object is a rock and "M" if it is a mine (metal cylinder). The numbers in the labels are in increasing order of aspect angle, but they do not encode the angle directly.

 data(Sonar, package="mlbench")
 head(Sonar)
 # V1     V2     V3     V4     V5     V6     V7     V8     V9    V10    V11    V12    V13    V14    V15    V16    V17
 # 1 0.0200 0.0371 0.0428 0.0207 0.0954 0.0986 0.1539 0.1601 0.3109 0.2111 0.1609 0.1582 0.2238 0.0645 0.0660 0.2273 0.3100
 # 2 0.0453 0.0523 0.0843 0.0689 0.1183 0.2583 0.2156 0.3481 0.3337 0.2872 0.4918 0.6552 0.6919 0.7797 0.7464 0.9444 1.0000
 # 3 0.0262 0.0582 0.1099 0.1083 0.0974 0.2280 0.2431 0.3771 0.5598 0.6194 0.6333 0.7060 0.5544 0.5320 0.6479 0.6931 0.6759
 # 4 0.0100 0.0171 0.0623 0.0205 0.0205 0.0368 0.1098 0.1276 0.0598 0.1264 0.0881 0.1992 0.0184 0.2261 0.1729 0.2131 0.0693
 # 5 0.0762 0.0666 0.0481 0.0394 0.0590 0.0649 0.1209 0.2467 0.3564 0.4459 0.4152 0.3952 0.4256 0.4135 0.4528 0.5326 0.7306
 # 6 0.0286 0.0453 0.0277 0.0174 0.0384 0.0990 0.1201 0.1833 0.2105 0.3039 0.2988 0.4250 0.6343 0.8198 1.0000 0.9988 0.9508
 # V18    V19    V20    V21    V22    V23    V24    V25    V26    V27    V28    V29    V30    V31    V32    V33    V34
 # 1 0.2999 0.5078 0.4797 0.5783 0.5071 0.4328 0.5550 0.6711 0.6415 0.7104 0.8080 0.6791 0.3857 0.1307 0.2604 0.5121 0.7547
 # 2 0.8874 0.8024 0.7818 0.5212 0.4052 0.3957 0.3914 0.3250 0.3200 0.3271 0.2767 0.4423 0.2028 0.3788 0.2947 0.1984 0.2341
 # 3 0.7551 0.8929 0.8619 0.7974 0.6737 0.4293 0.3648 0.5331 0.2413 0.5070 0.8533 0.6036 0.8514 0.8512 0.5045 0.1862 0.2709
 # 4 0.2281 0.4060 0.3973 0.2741 0.3690 0.5556 0.4846 0.3140 0.5334 0.5256 0.2520 0.2090 0.3559 0.6260 0.7340 0.6120 0.3497
 # 5 0.6193 0.2032 0.4636 0.4148 0.4292 0.5730 0.5399 0.3161 0.2285 0.6995 1.0000 0.7262 0.4724 0.5103 0.5459 0.2881 0.0981
 # 6 0.9025 0.7234 0.5122 0.2074 0.3985 0.5890 0.2872 0.2043 0.5782 0.5389 0.3750 0.3411 0.5067 0.5580 0.4778 0.3299 0.2198
 # V35    V36    V37    V38    V39    V40    V41    V42    V43    V44    V45    V46    V47    V48    V49    V50    V51
 # 1 0.8537 0.8507 0.6692 0.6097 0.4943 0.2744 0.0510 0.2834 0.2825 0.4256 0.2641 0.1386 0.1051 0.1343 0.0383 0.0324 0.0232
 # 2 0.1306 0.4182 0.3835 0.1057 0.1840 0.1970 0.1674 0.0583 0.1401 0.1628 0.0621 0.0203 0.0530 0.0742 0.0409 0.0061 0.0125
 # 3 0.4232 0.3043 0.6116 0.6756 0.5375 0.4719 0.4647 0.2587 0.2129 0.2222 0.2111 0.0176 0.1348 0.0744 0.0130 0.0106 0.0033
 # 4 0.3953 0.3012 0.5408 0.8814 0.9857 0.9167 0.6121 0.5006 0.3210 0.3202 0.4295 0.3654 0.2655 0.1576 0.0681 0.0294 0.0241
 # 5 0.1951 0.4181 0.4604 0.3217 0.2828 0.2430 0.1979 0.2444 0.1847 0.0841 0.0692 0.0528 0.0357 0.0085 0.0230 0.0046 0.0156
 # 6 0.1407 0.2856 0.3807 0.4158 0.4054 0.3296 0.2707 0.2650 0.0723 0.1238 0.1192 0.1089 0.0623 0.0494 0.0264 0.0081 0.0104
 # V52    V53    V54    V55    V56    V57    V58    V59    V60 Class
 # 1 0.0027 0.0065 0.0159 0.0072 0.0167 0.0180 0.0084 0.0090 0.0032     1
 # 2 0.0084 0.0089 0.0048 0.0094 0.0191 0.0140 0.0049 0.0052 0.0044     1
 # 3 0.0232 0.0166 0.0095 0.0180 0.0244 0.0316 0.0164 0.0095 0.0078     1
 # 4 0.0121 0.0036 0.0150 0.0085 0.0073 0.0050 0.0044 0.0040 0.0117     1
 # 5 0.0031 0.0054 0.0105 0.0110 0.0015 0.0072 0.0048 0.0107 0.0094     1
 # 6 0.0045 0.0014 0.0038 0.0013 0.0089 0.0057 0.0027 0.0051 0.0062     1

 Sonar$Class = ifelse(Sonar$Class == 'R', 0, 1)

 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------
 # Split Training/Testing 80:20
 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------

 # 80% of the sample size
 smp_size <- floor(0.60 * nrow(Sonar))

 # Set the seed to make your partition reproducible
 set.seed(1234)
 train_ind <- sample(seq_len(nrow(Sonar)), size = smp_size)

 train <- Sonar[train_ind, ]
 test <- Sonar[-train_ind, ]

 train_x = as.matrix(train[, 1:60])
 train_y = as.numeric(as.matrix(train[, 61]))
 test_x = as.matrix(test[, 1:60])
 test_y = as.numeric(as.matrix(test[, 61]))

 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------
 # Train the Neural Network Model
 # Description: use a multi-layer perceptron as our classifier. 
 # In mxnet, we have a function called mx.mlp for building a general multi-layer neural network 
 # to do classification or regression.
 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------
 mx.set.seed(1234)
 model <- mx.mlp(train_x, 
                train_y, 
                hidden_node=10, 
                out_node=2, 
                out_activation="softmax",
                num.round = 20, 
                array.batch.size = 15, 
                learning.rate = 0.07, 
                momentum = 0.9,
                eval.metric = mx.metric.accuracy)
 # Start training with 1 devices
 # [1] Train-accuracy=0.400000009271834
 # [2] Train-accuracy=0.459259268310335
 # [3] Train-accuracy=0.43703705072403
 # [4] Train-accuracy=0.451851861344443
 # [5] Train-accuracy=0.429629640446769
 # [6] Train-accuracy=0.474074088864856
 # [7] Train-accuracy=0.614814837773641
 # [8] Train-accuracy=0.622222244739532
 # [9] Train-accuracy=0.60000001721912
 # [10] Train-accuracy=0.629629648394055
 # [11] Train-accuracy=0.651851872603099
 # [12] Train-accuracy=0.703703721364339
 # [13] Train-accuracy=0.711111128330231
 # [14] Train-accuracy=0.688888907432556
 # [15] Train-accuracy=0.674074093500773
 # [16] Train-accuracy=0.762962977091471
 # [17] Train-accuracy=0.785185197989146
 # [18] Train-accuracy=0.800000011920929
 # [19] Train-accuracy=0.770370384057363
 # [20] Train-accuracy=0.75555557012558

 summary(model)
 # Length Class         Mode
 # symbol     1      Rcpp_MXSymbol S4  
 # arg.params 4      -none-        list
 # aux.params 0      -none-        list

 graph.viz(model$symbol)

 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------
 # Predict on the Test Data
 # -------------------------------------------------------------------------------------------
 # -------------------------------------------------------------------------------------------
 test_pred = predict(model, test_x) # Array of Predicted Probabilities
 test_pred = t(test_pred) # Predicted probabilities (for Prediction Wrangling)
 test_pred = as.data.frame(test_pred)
 test_pred
 # [,1]         [,2]
 # [1,] 0.426815450 0.5731846094
 # [2,] 0.959362805 0.0406371839
 # [3,] 0.773524523 0.2264754623
 # [4,] 0.133803442 0.8661965728
 # [5,] 0.784839451 0.2151605636
 # [6,] 0.988470435 0.0115295900
 # [7,] 0.992558479 0.0074415053
 # [8,] 0.998107433 0.0018925916
 # [9,] 0.618567407 0.3814325333
 # [10,] 0.979754388 0.0202456508
 # [11,] 0.997008383 0.0029916782
 # [12,] 0.992276728 0.0077233198
 # [13,] 0.999820054 0.0001799719

 # Assume that [,1] is probability for 0
 # Assume that [,2] is probability for 1
 test_pred = test_pred %>%
  dplyr::rename(prob_0 = V1,
                prob_1 = V2) %>%
  dplyr::mutate(Class_Pred = ifelse(prob_1 > prob_0, 1, 0))

 Class = test$Class

 test_eval = cbind(Class, test_pred)

 confusionMatrix(as.factor(test_eval$Class_Pred),
                as.factor(test_eval$Class),
                positive = '1')
 # Confusion Matrix and Statistics
 # 
 # Reference
 # Prediction  0  1
 # 0 32 20
 # 1  2 30
 # 
 # Accuracy : 0.7381         
 # 95% CI : (0.6307, 0.828)
 # No Information Rate : 0.5952         
 # P-Value [Acc > NIR] : 0.0044768      
 # 
 # Kappa : 0.4989         
 # Mcnemar's Test P-Value : 0.0002896      
 #                                          
 #             Sensitivity : 0.6000         
 #             Specificity : 0.9412         
 #          Pos Pred Value : 0.9375         
 #          Neg Pred Value : 0.6154         
 #              Prevalence : 0.5952         
 #          Detection Rate : 0.3571         
 #    Detection Prevalence : 0.3810         
 #       Balanced Accuracy : 0.7706         
 #                                          
 #        'Positive' Class : 1
	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------
	# Develop a Neural Network with MXNet in Five Minutes
	# R Notebook Author: @Korkrid Akepanidtaworn
	# Tutorial from MXNet Community
	# Source: https://mxnet.incubator.apache.org/tutorials/r/fiveMinutesNeuralNetwork.html
	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------
	library(mlbench)
	library(tidyverse)
	library(caret)
	library(e1071)

	# To install MXNet, you need to run these 4 lines of codes
	cran <- getOption("repos")
	cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/"
	options(repos = cran)
	# install.packages("mxnet")
	library(mxnet)

	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------
	# Load Data
	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------

	# This is the data set used by Gorman and Sejnowski in their study of the classiﬁcation of sonar signals using a neural network [1]. The task is to train a network to discriminate between sonar signals bounced off a metal cylinder and those bounced off a roughly cylindrical rock. Eachpatternisasetof60numbersintherange 0.0to 1.0. Eachnumberrepresentstheenergywithin a particular frequency band, integrated over a certain period of time. The integration aperture for higherfrequenciesoccurlaterintime,sincethesefrequenciesaretransmittedlaterduringthechirp. The label associated with each record contains the letter "R" if the object is a rock and "M" if it is a mine (metal cylinder). The numbers in the labels are in increasing order of aspect angle, but they do not encode the angle directly.

	data(Sonar, package="mlbench")
	head(Sonar)
	# V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17
	# 1 0.0200 0.0371 0.0428 0.0207 0.0954 0.0986 0.1539 0.1601 0.3109 0.2111 0.1609 0.1582 0.2238 0.0645 0.0660 0.2273 0.3100
	# 2 0.0453 0.0523 0.0843 0.0689 0.1183 0.2583 0.2156 0.3481 0.3337 0.2872 0.4918 0.6552 0.6919 0.7797 0.7464 0.9444 1.0000
	# 3 0.0262 0.0582 0.1099 0.1083 0.0974 0.2280 0.2431 0.3771 0.5598 0.6194 0.6333 0.7060 0.5544 0.5320 0.6479 0.6931 0.6759
	# 4 0.0100 0.0171 0.0623 0.0205 0.0205 0.0368 0.1098 0.1276 0.0598 0.1264 0.0881 0.1992 0.0184 0.2261 0.1729 0.2131 0.0693
	# 5 0.0762 0.0666 0.0481 0.0394 0.0590 0.0649 0.1209 0.2467 0.3564 0.4459 0.4152 0.3952 0.4256 0.4135 0.4528 0.5326 0.7306
	# 6 0.0286 0.0453 0.0277 0.0174 0.0384 0.0990 0.1201 0.1833 0.2105 0.3039 0.2988 0.4250 0.6343 0.8198 1.0000 0.9988 0.9508
	# V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34
	# 1 0.2999 0.5078 0.4797 0.5783 0.5071 0.4328 0.5550 0.6711 0.6415 0.7104 0.8080 0.6791 0.3857 0.1307 0.2604 0.5121 0.7547
	# 2 0.8874 0.8024 0.7818 0.5212 0.4052 0.3957 0.3914 0.3250 0.3200 0.3271 0.2767 0.4423 0.2028 0.3788 0.2947 0.1984 0.2341
	# 3 0.7551 0.8929 0.8619 0.7974 0.6737 0.4293 0.3648 0.5331 0.2413 0.5070 0.8533 0.6036 0.8514 0.8512 0.5045 0.1862 0.2709
	# 4 0.2281 0.4060 0.3973 0.2741 0.3690 0.5556 0.4846 0.3140 0.5334 0.5256 0.2520 0.2090 0.3559 0.6260 0.7340 0.6120 0.3497
	# 5 0.6193 0.2032 0.4636 0.4148 0.4292 0.5730 0.5399 0.3161 0.2285 0.6995 1.0000 0.7262 0.4724 0.5103 0.5459 0.2881 0.0981
	# 6 0.9025 0.7234 0.5122 0.2074 0.3985 0.5890 0.2872 0.2043 0.5782 0.5389 0.3750 0.3411 0.5067 0.5580 0.4778 0.3299 0.2198
	# V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51
	# 1 0.8537 0.8507 0.6692 0.6097 0.4943 0.2744 0.0510 0.2834 0.2825 0.4256 0.2641 0.1386 0.1051 0.1343 0.0383 0.0324 0.0232
	# 2 0.1306 0.4182 0.3835 0.1057 0.1840 0.1970 0.1674 0.0583 0.1401 0.1628 0.0621 0.0203 0.0530 0.0742 0.0409 0.0061 0.0125
	# 3 0.4232 0.3043 0.6116 0.6756 0.5375 0.4719 0.4647 0.2587 0.2129 0.2222 0.2111 0.0176 0.1348 0.0744 0.0130 0.0106 0.0033
	# 4 0.3953 0.3012 0.5408 0.8814 0.9857 0.9167 0.6121 0.5006 0.3210 0.3202 0.4295 0.3654 0.2655 0.1576 0.0681 0.0294 0.0241
	# 5 0.1951 0.4181 0.4604 0.3217 0.2828 0.2430 0.1979 0.2444 0.1847 0.0841 0.0692 0.0528 0.0357 0.0085 0.0230 0.0046 0.0156
	# 6 0.1407 0.2856 0.3807 0.4158 0.4054 0.3296 0.2707 0.2650 0.0723 0.1238 0.1192 0.1089 0.0623 0.0494 0.0264 0.0081 0.0104
	# V52 V53 V54 V55 V56 V57 V58 V59 V60 Class
	# 1 0.0027 0.0065 0.0159 0.0072 0.0167 0.0180 0.0084 0.0090 0.0032 1
	# 2 0.0084 0.0089 0.0048 0.0094 0.0191 0.0140 0.0049 0.0052 0.0044 1
	# 3 0.0232 0.0166 0.0095 0.0180 0.0244 0.0316 0.0164 0.0095 0.0078 1
	# 4 0.0121 0.0036 0.0150 0.0085 0.0073 0.0050 0.0044 0.0040 0.0117 1
	# 5 0.0031 0.0054 0.0105 0.0110 0.0015 0.0072 0.0048 0.0107 0.0094 1
	# 6 0.0045 0.0014 0.0038 0.0013 0.0089 0.0057 0.0027 0.0051 0.0062 1

	Sonar$Class = ifelse(Sonar$Class == 'R', 0, 1)

	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------
	# Split Training/Testing 80:20
	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------

	# 80% of the sample size
	smp_size <- floor(0.60 * nrow(Sonar))

	# Set the seed to make your partition reproducible
	set.seed(1234)
	train_ind <- sample(seq_len(nrow(Sonar)), size = smp_size)

	train <- Sonar[train_ind, ]
	test <- Sonar[-train_ind, ]

	train_x = as.matrix(train[, 1:60])
	train_y = as.numeric(as.matrix(train[, 61]))
	test_x = as.matrix(test[, 1:60])
	test_y = as.numeric(as.matrix(test[, 61]))

	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------
	# Train the Neural Network Model
	# Description: use a multi-layer perceptron as our classifier.
	# In mxnet, we have a function called mx.mlp for building a general multi-layer neural network
	# to do classification or regression.
	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------
	mx.set.seed(1234)
	model <- mx.mlp(train_x,
	train_y,
	hidden_node=10,
	out_node=2,
	out_activation="softmax",
	num.round = 20,
	array.batch.size = 15,
	learning.rate = 0.07,
	momentum = 0.9,
	eval.metric = mx.metric.accuracy)
	# Start training with 1 devices
	# [1] Train-accuracy=0.400000009271834
	# [2] Train-accuracy=0.459259268310335
	# [3] Train-accuracy=0.43703705072403
	# [4] Train-accuracy=0.451851861344443
	# [5] Train-accuracy=0.429629640446769
	# [6] Train-accuracy=0.474074088864856
	# [7] Train-accuracy=0.614814837773641
	# [8] Train-accuracy=0.622222244739532
	# [9] Train-accuracy=0.60000001721912
	# [10] Train-accuracy=0.629629648394055
	# [11] Train-accuracy=0.651851872603099
	# [12] Train-accuracy=0.703703721364339
	# [13] Train-accuracy=0.711111128330231
	# [14] Train-accuracy=0.688888907432556
	# [15] Train-accuracy=0.674074093500773
	# [16] Train-accuracy=0.762962977091471
	# [17] Train-accuracy=0.785185197989146
	# [18] Train-accuracy=0.800000011920929
	# [19] Train-accuracy=0.770370384057363
	# [20] Train-accuracy=0.75555557012558

	summary(model)
	# Length Class Mode
	# symbol 1 Rcpp_MXSymbol S4
	# arg.params 4 -none- list
	# aux.params 0 -none- list

	graph.viz(model$symbol)

	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------
	# Predict on the Test Data
	# -------------------------------------------------------------------------------------------
	# -------------------------------------------------------------------------------------------
	test_pred = predict(model, test_x) # Array of Predicted Probabilities
	test_pred = t(test_pred) # Predicted probabilities (for Prediction Wrangling)
	test_pred = as.data.frame(test_pred)
	test_pred
	# [,1] [,2]
	# [1,] 0.426815450 0.5731846094
	# [2,] 0.959362805 0.0406371839
	# [3,] 0.773524523 0.2264754623
	# [4,] 0.133803442 0.8661965728
	# [5,] 0.784839451 0.2151605636
	# [6,] 0.988470435 0.0115295900
	# [7,] 0.992558479 0.0074415053
	# [8,] 0.998107433 0.0018925916
	# [9,] 0.618567407 0.3814325333
	# [10,] 0.979754388 0.0202456508
	# [11,] 0.997008383 0.0029916782
	# [12,] 0.992276728 0.0077233198
	# [13,] 0.999820054 0.0001799719

	# Assume that [,1] is probability for 0
	# Assume that [,2] is probability for 1
	test_pred = test_pred %>%
	dplyr::rename(prob_0 = V1,
	prob_1 = V2) %>%
	dplyr::mutate(Class_Pred = ifelse(prob_1 > prob_0, 1, 0))

	Class = test$Class

	test_eval = cbind(Class, test_pred)

	confusionMatrix(as.factor(test_eval$Class_Pred),
	as.factor(test_eval$Class),
	positive = '1')
	# Confusion Matrix and Statistics
	#
	# Reference
	# Prediction 0 1
	# 0 32 20
	# 1 2 30
	#
	# Accuracy : 0.7381
	# 95% CI : (0.6307, 0.828)
	# No Information Rate : 0.5952
	# P-Value [Acc > NIR] : 0.0044768
	#
	# Kappa : 0.4989
	# Mcnemar's Test P-Value : 0.0002896
	#
	# Sensitivity : 0.6000
	# Specificity : 0.9412
	# Pos Pred Value : 0.9375
	# Neg Pred Value : 0.6154
	# Prevalence : 0.5952
	# Detection Rate : 0.3571
	# Detection Prevalence : 0.3810
	# Balanced Accuracy : 0.7706
	#
	# 'Positive' Class : 1