-
-
Save mick001/cbf04580a74c99bfa00b07cfe929876a to your computer and use it in GitHub Desktop.
# Generate a train-test dataset | |
# Clean environment and load required packages | |
rm(list=ls()) | |
require(EBImage) | |
# Set wd where resized greyscale images are located | |
setwd("C://dogs_resized") | |
# Out file | |
out_file <- "C://dogs_28.csv" | |
# List images in path | |
images <- list.files() | |
# Set up df | |
df <- data.frame() | |
# Set image size. In this case 28x28 | |
img_size <- 28*28 | |
# Set label | |
label <- 1 | |
# Main loop. Loop over each image | |
for(i in 1:length(images)) | |
{ | |
# Read image | |
img <- readImage(images[i]) | |
# Get the image as a matrix | |
img_matrix <- [email protected] | |
# Coerce to a vector | |
img_vector <- as.vector(t(img_matrix)) | |
# Add label | |
vec <- c(label, img_vector) | |
# Bind rows | |
df <- rbind(df,vec) | |
# Print status info | |
print(paste("Done ", i, sep = "")) | |
} | |
# Set names | |
names(df) <- c("label", paste("pixel", c(1:img_size))) | |
# Write out dataset | |
write.csv(df, out_file, row.names = FALSE) | |
#------------------------------------------------------------------------------- | |
# Test and train split and shuffle | |
# Load datasets | |
plants <- read.csv("plants_28.csv") | |
dogs <- read.csv("dogs_28.csv") | |
# Bind rows in a single dataset | |
new <- rbind(plants, dogs) | |
# Shuffle new dataset | |
shuffled <- new[sample(1:1512),] | |
# Train-test split | |
train_28 <- shuffled[1:1200,] | |
test_28 <- shuffled[1201:1512,] | |
# Save train-test datasets | |
write.csv(train_28, "train_28.csv",row.names = FALSE) | |
write.csv(test_28, "test_28.csv",row.names = FALSE) |
Hi! Thank you for the comment! I usually link to the datasets that I use but this time since I do not own any of the images I couldn't do it. You can get the links to each image at image-net.org, they are mostly on Tumblr (publicly available) or similar websites. I used about 1500 of those at the following links http://www.image-net.org/synset?wnid=n02084071 and http://www.image-net.org/synset?wnid=n00017222 however not all are .jpeg so if you want to use all of them you might need to do some more preprocessing.
Thanks for the great code. I was just wondering whether you can give an example of the columns available in the "plants_28.csv" and "dogs_28.csv" files. I think it is not immediate for me to understand how to send the classes info to the ANN.
Hi Sciabola, I've just released a step by step tutorial on how to set up and run a deep convolutional neural network with a publicly available dataset. If you like, you can check it at https://firsttimeprogrammer.blogspot.com/2016/08/image-recognition-tutorial-in-r-using.html
Nice example and article on your blog. How could we download the 1500 images of dogs and plants?