Skip to content

Instantly share code, notes, and snippets.

@mjoc
Created June 1, 2017 20:56
Show Gist options
  • Save mjoc/b6a1001208b08546069895ae13d1d541 to your computer and use it in GitHub Desktop.
Save mjoc/b6a1001208b08546069895ae13d1d541 to your computer and use it in GitHub Desktop.
library("Rcpp")
library("devtools")
install_github("mjoc/antfarm")
library("antfarm")
library("dplyr")
library("caret")
doLoadFromBinary <- TRUE
if (doLoadFromBinary) {
# Load the MNIST digit recognition dataset into R
# http://yann.lecun.com/exdb/mnist/
# assume you have all 4 files and gunzip'd them
# creates train$n, train$x, train$y and test$n, test$x, test$y
# e.g. train$x is a 60000 x 784 matrix, each row is one digit (28x28)
# call: show_digit(train$x[5,]) to see a digit.
# brendan o'connor - gist.github.com/39760 - anyall.org
load_mnist <- function() {
load_image_file <- function(filename) {
ret = list()
f = file(filename, 'rb')
readBin(f,
'integer',
n = 1,
size = 4,
endian = 'big')
ret$n = readBin(f,
'integer',
n = 1,
size = 4,
endian = 'big')
nrow = readBin(f,
'integer',
n = 1,
size = 4,
endian = 'big')
ncol = readBin(f,
'integer',
n = 1,
size = 4,
endian = 'big')
x = readBin(
f,
'integer',
n = ret$n * nrow * ncol,
size = 1,
signed = F
)
ret$x = matrix(x, ncol = nrow * ncol, byrow = T)
close(f)
ret
}
load_label_file <- function(filename) {
f = file(filename, 'rb')
readBin(f,
'integer',
n = 1,
size = 4,
endian = 'big')
n = readBin(f,
'integer',
n = 1,
size = 4,
endian = 'big')
y = readBin(f,
'integer',
n = n,
size = 1,
signed = F)
close(f)
y
}
train <<-
load_image_file('/Users/Martin/Google Drive/Projects/MNIST/Data/train-images-idx3-ubyte')
test <<-
load_image_file('/Users/Martin/Google Drive/Projects/MNIST/Data/t10k-images-idx3-ubyte')
train$y <<-
load_label_file('/Users/Martin/Google Drive/Projects/MNIST/Data/train-labels-idx1-ubyte')
test$y <<-
load_label_file('/Users/Martin/Google Drive/Projects/MNIST/Data/t10k-labels-idx1-ubyte')
}
show_digit <- function(arr784, col = gray(12:1 / 12), ...) {
image(matrix(arr784, nrow = 28)[, 28:1], col = col, ...)
}
train <- data.frame()
test <- data.frame()
# Load data.
load_mnist()
train$x <- train$x / 255
# Pick a row to plot
i_plot <- 123
show_digit(train$x[i_plot, ])
title(main = paste(train$y[i_plot]))
# Setup training data with digit and pixel values with 60/40 split for train/cv.
mnist_train_data = train$x
mnist_train_labels <-
model.matrix( ~ y - 1, data.frame(y = as.factor(train$y)))
# Write these out for
# write.table(mnist_train_data,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped.dat',sep=",",row.names = F, col.names = F)
# write.table(mnist_train_labels,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped.dat',sep=",",row.names = F, col.names = F)
mnist_train_data <- mnist_train_data[30001:40000, ]
mnist_train_labels <- mnist_train_labels[30001:40000, ]
# write.table(mnist_train_labels,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped_10000.dat',sep=",",row.names = F, col.names = F)
# write.table(mnist_train_data,file = '/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped_10000.dat',sep=",",row.names = F, col.names = F)
}
#### Build Neural Network
af_net_module <- Module("af_nnet", PACKAGE = 'antfarm')
af_dataset_image <- af_net_module$Dataset
af_nnet_image <- af_net_module$Nnet
af_bper_image <- af_net_module$Backpropper
# Create dataset
doLoadFromBinary <- FALSE
if (doLoadFromBinary) {
mnist_train_dataset <-
new(af_dataset_image, mnist_train_data, mnist_train_labels)
} else{
data_file <-
'/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_prepped_4000.dat'
labels_file <-
'/Users/Martin/Google Drive/Projects/MNIST/Data/mnist_train_labels_prepped_4000.dat'
mnist_train_dataset <-
new(af_dataset_image, data_file, labels_file, FALSE, ",")
}
# Create Network
nnet_geom <- c(784, 100, 10)
my_net <- new(af_nnet_image, nnet_geom, "tanh", "softmax")
# Clamp the data
my_net$clampData(mnist_train_dataset)
my_net$feedForward()
# Create Weight Optimiser
my_bp <- new(af_bper_image, my_net)
my_bp$lossType <- "xent"
# Initialise weights
my_bp$initWeights("gauss", 0.3)
# Optimise the weights relative to loss function
nEpochs <- 100
start.time <- Sys.time()
my_bp$doBPOptim(0, 0.8, 0.2, nEpochs)
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
# If you want to examine the weights
wgts1 <- my_net$getWeights(1)
wgts2 <- my_net$getWeights(1)
test$x <- test$x / 255
# Pick a digit to plot
i_plot <- 123
show_digit(test$x[i_plot, ])
title(main = paste(test$y[i_plot]))
mnist_test_data = test$x
mnist_test_labels <-
model.matrix( ~ y - 1, data.frame(y = as.factor(test$y)))
mnist_test_dataset <- new(af_dataset_image, mnist_test_data, NULL)
my_net$clampData(mnist_test_dataset)
my_net$feedForward()
net_output <- my_net$generatedLabels
net_test_labels <- apply(net_output, 1, which.max) - 1
mnist_test_correct <- apply(mnist_test_labels, 1, which.max) - 1
sum(net_test_labels == mnist_test_correct) / length(mnist_test_correct)
i_wrong <- which(net_test_labels != mnist_test_correct)
i_right <- which(net_test_labels == mnist_test_correct)
i_plot <- sample(i_right, size = 1)
par(mfrow = c(1, 2))
show_digit(test$x[i_plot, ])
title(main = paste(test$y[i_plot], "( record", i_plot, ")"))
barplot(net_output[i_plot, ], names.arg = paste(0:9))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment