mjoc · June 1, 2017 20:56
diff --git a/antfarm_mnist.R b/antfarm_mnist.R
 library("Rcpp")
 library("devtools")
 install_github("mjoc/antfarm")
 library("antfarm")
 library("dplyr")
 library("caret")

 doLoadFromBinary <- TRUE
 if (doLoadFromBinary) {
  # Load the MNIST digit recognition dataset into R
  # http://yann.lecun.com/exdb/mnist/
  # assume you have all 4 files and gunzip'd them
  # creates train$n, train$x, train$y  and test$n, test$x, test$y
  # e.g. train$x is a 60000 x 784 matrix, each row is one digit (28x28)
  # call:  show_digit(train$x[5,])   to see a digit.
  # brendan o'connor - gist.github.com/39760 - anyall.org

  load_mnist <- function() {
    load_image_file <- function(filename) {
      ret = list()
      f = file(filename, 'rb')
      readBin(f,
              'integer',
              n = 1,
              size = 4,
              endian = 'big')
      ret$n = readBin(f,
                      'integer',
                      n = 1,
                      size = 4,
                      endian = 'big')
      nrow = readBin(f,
                     'integer',
                     n = 1,
                     size = 4,
                     endian = 'big')
      ncol = readBin(f,
                     'integer',
                     n = 1,
                     size = 4,
                     endian = 'big')
      x = readBin(
        f,
        'integer',
        n = ret$n * nrow * ncol,
        size = 1,
        signed = F
      )
      ret$x = matrix(x, ncol = nrow * ncol, byrow = T)
      close(f)
      ret
    }
    load_label_file <- function(filename) {
      f = file(filename, 'rb')
      readBin(f,
              'integer',
              n = 1,
              size = 4,
              endian = 'big')
      n = readBin(f,
                  'integer',
                  n = 1,
                  size = 4,
                  endian = 'big')
      y = readBin(f,
                  'integer',
                  n = n,
                  size = 1,
                  signed = F)
      close(f)
      y
    }
    train <<-
      load_image_file('/Users/Martin/Google Drive/Projects/MNIST/Data/train-images-idx3-ubyte')
    test <<-
      load_image_file('/Users/Martin/Google Drive/Projects/MNIST/Data/t10k-images-idx3-ubyte')

    train$y <<-
      load_label_file('/Users/Martin/Google Drive/Projects/MNIST/Data/train-labels-idx1-ubyte')
    test$y <<-
      load_label_file('/Users/Martin/Google Drive/Projects/MNIST/Data/t10k-labels-idx1-ubyte')
  }

  show_digit <- function(arr784, col = gray(12:1 / 12), ...) {
    image(matrix(arr784, nrow = 28)[, 28:1], col = col, ...)
  }

  train <- data.frame()
  test <- data.frame()

  # Load data.
  load_mnist()

  train$x <- train$x / 255

  # Pick a row to plot
  i_plot <- 123
  show_digit(train$x[i_plot, ])
  title(main = paste(train$y[i_plot]))

  # Setup training data with digit and pixel values with 60/40 split for train/cv.
  mnist_train_data = train$x
  mnist_train_labels <-
    model.matrix( ~ y - 1, data.frame(y = as.factor(train$y)))

  # Write these out for
  # write.table(mnist_train_data,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped.dat',sep=",",row.names = F, col.names = F)
  # write.table(mnist_train_labels,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped.dat',sep=",",row.names = F, col.names = F)
  mnist_train_data <- mnist_train_data[30001:40000, ]
  mnist_train_labels <- mnist_train_labels[30001:40000, ]
  # write.table(mnist_train_labels,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped_10000.dat',sep=",",row.names = F, col.names = F)
  # write.table(mnist_train_data,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped_10000.dat',sep=",",row.names = F, col.names = F)
 }

 #### Build Neural Network

 af_net_module <- Module("af_nnet", PACKAGE = 'antfarm')
 af_dataset_image <- af_net_module$Dataset
 af_nnet_image <- af_net_module$Nnet
 af_bper_image <- af_net_module$Backpropper

 # Create dataset
 doLoadFromBinary <- FALSE
 if (doLoadFromBinary) {
  mnist_train_dataset <-
    new(af_dataset_image, mnist_train_data, mnist_train_labels)
 } else{
  data_file <-
    '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped_4000.dat'
  labels_file <-
    '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped_4000.dat'
  mnist_train_dataset <-
    new(af_dataset_image, data_file, labels_file, FALSE, ",")
 }

 # Create Network
 nnet_geom <- c(784, 100, 10)
 my_net <- new(af_nnet_image, nnet_geom, "tanh", "softmax")

 # Clamp the data
 my_net$clampData(mnist_train_dataset)

 my_net$feedForward()

 # Create Weight Optimiser
 my_bp <- new(af_bper_image, my_net)
 my_bp$lossType <- "xent"
 # Initialise weights
 my_bp$initWeights("gauss", 0.3)

 # Optimise the weights relative to loss function
 nEpochs <- 100
 start.time <- Sys.time()
 my_bp$doBPOptim(0, 0.8, 0.2, nEpochs)
 end.time <- Sys.time()
 time.taken <- end.time - start.time
 time.taken

 # If you want to examine the weights
 wgts1 <- my_net$getWeights(1)
 wgts2 <- my_net$getWeights(1)

 test$x <- test$x / 255

 # Pick a digit to plot
 i_plot <- 123
 show_digit(test$x[i_plot, ])
 title(main = paste(test$y[i_plot]))

 mnist_test_data = test$x
 mnist_test_labels <-
  model.matrix( ~ y - 1, data.frame(y = as.factor(test$y)))

 mnist_test_dataset <- new(af_dataset_image, mnist_test_data, NULL)

 my_net$clampData(mnist_test_dataset)

 my_net$feedForward()

 net_output <- my_net$generatedLabels

 net_test_labels <- apply(net_output, 1, which.max) - 1
 mnist_test_correct <- apply(mnist_test_labels, 1, which.max) - 1

 sum(net_test_labels == mnist_test_correct) / length(mnist_test_correct)

 i_wrong <- which(net_test_labels != mnist_test_correct)
 i_right <- which(net_test_labels == mnist_test_correct)
 i_plot <- sample(i_right, size = 1)

 par(mfrow = c(1, 2))
 show_digit(test$x[i_plot, ])
 title(main = paste(test$y[i_plot], "( record", i_plot, ")"))

 barplot(net_output[i_plot, ], names.arg = paste(0:9))
	library("Rcpp")
	library("devtools")
	install_github("mjoc/antfarm")
	library("antfarm")
	library("dplyr")
	library("caret")

	doLoadFromBinary <- TRUE
	if (doLoadFromBinary) {
	# Load the MNIST digit recognition dataset into R
	# http://yann.lecun.com/exdb/mnist/
	# assume you have all 4 files and gunzip'd them
	# creates train$n, train$x, train$y and test$n, test$x, test$y
	# e.g. train$x is a 60000 x 784 matrix, each row is one digit (28x28)
	# call: show_digit(train$x[5,]) to see a digit.
	# brendan o'connor - gist.github.com/39760 - anyall.org

	load_mnist <- function() {
	load_image_file <- function(filename) {
	ret = list()
	f = file(filename, 'rb')
	readBin(f,
	'integer',
	n = 1,
	size = 4,
	endian = 'big')
	ret$n = readBin(f,
	'integer',
	n = 1,
	size = 4,
	endian = 'big')
	nrow = readBin(f,
	'integer',
	n = 1,
	size = 4,
	endian = 'big')
	ncol = readBin(f,
	'integer',
	n = 1,
	size = 4,
	endian = 'big')
	x = readBin(
	f,
	'integer',
	n = ret$n * nrow * ncol,
	size = 1,
	signed = F
	)
	ret$x = matrix(x, ncol = nrow * ncol, byrow = T)
	close(f)
	ret
	}
	load_label_file <- function(filename) {
	f = file(filename, 'rb')
	readBin(f,
	'integer',
	n = 1,
	size = 4,
	endian = 'big')
	n = readBin(f,
	'integer',
	n = 1,
	size = 4,
	endian = 'big')
	y = readBin(f,
	'integer',
	n = n,
	size = 1,
	signed = F)
	close(f)
	y
	}
	train <<-
	load_image_file('/Users/Martin/Google Drive/Projects/MNIST/Data/train-images-idx3-ubyte')
	test <<-
	load_image_file('/Users/Martin/Google Drive/Projects/MNIST/Data/t10k-images-idx3-ubyte')

	train$y <<-
	load_label_file('/Users/Martin/Google Drive/Projects/MNIST/Data/train-labels-idx1-ubyte')
	test$y <<-
	load_label_file('/Users/Martin/Google Drive/Projects/MNIST/Data/t10k-labels-idx1-ubyte')
	}

	show_digit <- function(arr784, col = gray(12:1 / 12), ...) {
	image(matrix(arr784, nrow = 28)[, 28:1], col = col, ...)
	}

	train <- data.frame()
	test <- data.frame()

	# Load data.
	load_mnist()

	train$x <- train$x / 255

	# Pick a row to plot
	i_plot <- 123
	show_digit(train$x[i_plot, ])
	title(main = paste(train$y[i_plot]))

	# Setup training data with digit and pixel values with 60/40 split for train/cv.
	mnist_train_data = train$x
	mnist_train_labels <-
	model.matrix( ~ y - 1, data.frame(y = as.factor(train$y)))

	# Write these out for
	# write.table(mnist_train_data,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped.dat',sep=",",row.names = F, col.names = F)
	# write.table(mnist_train_labels,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped.dat',sep=",",row.names = F, col.names = F)
	mnist_train_data <- mnist_train_data[30001:40000, ]
	mnist_train_labels <- mnist_train_labels[30001:40000, ]
	# write.table(mnist_train_labels,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped_10000.dat',sep=",",row.names = F, col.names = F)
	# write.table(mnist_train_data,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped_10000.dat',sep=",",row.names = F, col.names = F)
	}

	#### Build Neural Network

	af_net_module <- Module("af_nnet", PACKAGE = 'antfarm')
	af_dataset_image <- af_net_module$Dataset
	af_nnet_image <- af_net_module$Nnet
	af_bper_image <- af_net_module$Backpropper

	# Create dataset
	doLoadFromBinary <- FALSE
	if (doLoadFromBinary) {
	mnist_train_dataset <-
	new(af_dataset_image, mnist_train_data, mnist_train_labels)
	} else{
	data_file <-
	'/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped_4000.dat'
	labels_file <-
	'/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped_4000.dat'
	mnist_train_dataset <-
	new(af_dataset_image, data_file, labels_file, FALSE, ",")
	}

	# Create Network
	nnet_geom <- c(784, 100, 10)
	my_net <- new(af_nnet_image, nnet_geom, "tanh", "softmax")

	# Clamp the data
	my_net$clampData(mnist_train_dataset)

	my_net$feedForward()

	# Create Weight Optimiser
	my_bp <- new(af_bper_image, my_net)
	my_bp$lossType <- "xent"
	# Initialise weights
	my_bp$initWeights("gauss", 0.3)

	# Optimise the weights relative to loss function
	nEpochs <- 100
	start.time <- Sys.time()
	my_bp$doBPOptim(0, 0.8, 0.2, nEpochs)
	end.time <- Sys.time()
	time.taken <- end.time - start.time
	time.taken

	# If you want to examine the weights
	wgts1 <- my_net$getWeights(1)
	wgts2 <- my_net$getWeights(1)

	test$x <- test$x / 255

	# Pick a digit to plot
	i_plot <- 123
	show_digit(test$x[i_plot, ])
	title(main = paste(test$y[i_plot]))

	mnist_test_data = test$x
	mnist_test_labels <-
	model.matrix( ~ y - 1, data.frame(y = as.factor(test$y)))

	mnist_test_dataset <- new(af_dataset_image, mnist_test_data, NULL)

	my_net$clampData(mnist_test_dataset)

	my_net$feedForward()

	net_output <- my_net$generatedLabels

	net_test_labels <- apply(net_output, 1, which.max) - 1
	mnist_test_correct <- apply(mnist_test_labels, 1, which.max) - 1

	sum(net_test_labels == mnist_test_correct) / length(mnist_test_correct)

	i_wrong <- which(net_test_labels != mnist_test_correct)
	i_right <- which(net_test_labels == mnist_test_correct)
	i_plot <- sample(i_right, size = 1)

	par(mfrow = c(1, 2))
	show_digit(test$x[i_plot, ])
	title(main = paste(test$y[i_plot], "( record", i_plot, ")"))

	barplot(net_output[i_plot, ], names.arg = paste(0:9))