Created
October 2, 2013 05:11
-
-
Save kkurni/6789379 to your computer and use it in GitHub Desktop.
R Script for face detection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(reshape2) | |
library(foreach) | |
memory.limit(1000000) | |
# parameters | |
data.dir <- 'C:/Projects/KK/BigData/FacebookFaceDetection/data/' | |
patch_size <- 10 | |
search_size <- 2 | |
# read data and convert image strings to arrays | |
train.file <- paste0(data.dir, 'training.csv') | |
test.file <- paste0(data.dir, 'test.csv') | |
data.file <- paste0(data.dir, 'data.Rd') | |
d.train <- read.csv(train.file, stringsAsFactors=F) | |
d.test <- read.csv(test.file, stringsAsFactors=F) | |
im.train <- foreach(im = d.train$Image, .combine=rbind) %dopar% { | |
as.integer(unlist(strsplit(im, " "))) | |
} | |
im.test <- foreach(im = d.test$Image, .combine=rbind) %dopar% { | |
as.integer(unlist(strsplit(im, " "))) | |
} | |
d.train$Image <- NULL | |
d.test$Image <- NULL | |
# list the coordinates we have to predict | |
coordinate.names <- gsub("_x", "", names(d.train)[grep("_x", names(d.train))]) | |
# for each one, compute the average patch | |
mean.patches <- foreach(coord = coordinate.names) %dopar% { | |
cat(sprintf("computing mean patch for %s\n", coord)) | |
coord_x <- paste(coord, "x", sep="_") | |
coord_y <- paste(coord, "y", sep="_") | |
# compute average patch | |
patches <- foreach (i = 1:nrow(d.train), .combine=rbind) %do% { | |
im <- matrix(data = im.train[i,], nrow=96, ncol=96) | |
x <- d.train[i, coord_x] | |
y <- d.train[i, coord_y] | |
x1 <- (x-patch_size) | |
x2 <- (x+patch_size) | |
y1 <- (y-patch_size) | |
y2 <- (y+patch_size) | |
if ( (!is.na(x)) && (!is.na(y)) && (x1>=1) && (x2<=96) && (y1>=1) && (y2<=96) ) | |
{ | |
as.vector(im[x1:x2, y1:y2]) | |
} | |
else | |
{ | |
NULL | |
} | |
} | |
matrix(data = colMeans(patches), nrow=2*patch_size+1, ncol=2*patch_size+1) | |
} | |
# for each coordinate and for each test image, find the position that best correlates with the average patch | |
p <- foreach(coord_i = 1:length(coordinate.names), .combine=cbind) %dopar% { | |
# the coordinates we want to predict | |
coord <- coordinate.names[coord_i] | |
coord_x <- paste(coord, "x", sep="_") | |
coord_y <- paste(coord, "y", sep="_") | |
# the average of them in the training set (our starting point) | |
mean_x <- mean(d.train[, coord_x], na.rm=T) | |
mean_y <- mean(d.train[, coord_y], na.rm=T) | |
# search space: 'search_size' pixels centered on the average coordinates | |
x1 <- as.integer(mean_x)-search_size | |
x2 <- as.integer(mean_x)+search_size | |
y1 <- as.integer(mean_y)-search_size | |
y2 <- as.integer(mean_y)+search_size | |
# ensure we only consider patches completely inside the image | |
x1 <- ifelse(x1-patch_size<1, patch_size+1, x1) | |
y1 <- ifelse(y1-patch_size<1, patch_size+1, y1) | |
x2 <- ifelse(x2+patch_size>96, 96-patch_size, x2) | |
y2 <- ifelse(y2+patch_size>96, 96-patch_size, y2) | |
# build a list of all positions to be tested | |
params <- expand.grid(x = x1:x2, y = y1:y2) | |
# for each image... | |
r <- foreach(i = 1:nrow(d.test), .combine=rbind) %do% { | |
if ((coord_i==1)&&((i %% 100)==0)) { cat(sprintf("%d/%d\n", i, nrow(d.test))) } | |
im <- matrix(data = im.test[i,], nrow=96, ncol=96) | |
# ... compute a score for each position ... | |
r <- foreach(j = 1:nrow(params), .combine=rbind) %do% { | |
x <- params$x[j] | |
y <- params$y[j] | |
p <- im[(x-patch_size):(x+patch_size), (y-patch_size):(y+patch_size)] | |
score <- cor(as.vector(p), as.vector(mean.patches[[coord_i]])) | |
score <- ifelse(is.na(score), 0, score) | |
data.frame(x, y, score) | |
} | |
# ... and return the best | |
best <- r[which.max(r$score), c("x", "y")] | |
} | |
names(r) <- c(coord_x, coord_y) | |
r | |
} | |
# prepare file for submission | |
predictions <- data.frame(ImageId = 1:nrow(d.test), p) | |
submission <- melt(predictions, id.vars="ImageId", variable.name="FeatureName", value.name="Location") | |
example.submission <- read.csv(paste0(data.dir, 'IdLookupTable.csv')) | |
sub.col.names <- names(example.submission) | |
example.submission$Location <- NULL | |
submission <- merge(example.submission, submission, all.x=T, sort=F) | |
submission <- submission[, sub.col.names] | |
write.csv(submission, file="submission_search.csv", quote=F, row.names=F) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment