Last active
June 1, 2019 00:33
-
-
Save thejsj/09150063d678fb79389411462a6e70f1 to your computer and use it in GitHub Desktop.
Basilica R Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install Basilica | |
install.packages("https://storage.googleapis.com/basilica-r-client/basilica_0.0.2.tar.gz", repos=NULL) | |
library(basilica) | |
# Create a connection | |
conn = connect(auth_key="SLOW_DEMO_KEY") | |
# Change the working directory | |
# Besure to download the demo file first | |
# wget https://storage.googleapis.com/basilica-public/cats_dogs_demo.tgz | |
# tar -xf cats_dogs_demo.tgz | |
setwd('~/Desktop/cats_dogs_demo') | |
# Example on how to use the library | |
embedding <- embed_image_file("./images/dog.1.jpg", conn=conn) | |
# Getting embddings for all files | |
files <- list.files('./images') | |
image_files <- paste('./images/', files, sep="") | |
# embeddings <- embed_image_files(image_files, conn=conn) | |
# Loading from JSON | |
library("rjson") | |
embeddings_json <- paste('~/Desktop/embeddings/', files, '.emb', sep="") | |
embeddings_data <- embeddings_json %>% | |
lapply(function(x) fromJSON(file = file.path(x))) %>% | |
purrr::flatten_dbl() %>% | |
matrix(ncol = 2048) | |
# PCA | |
pca <- prcomp(embeddings_data, center = TRUE,scale = TRUE) | |
pca_embeddings <- pca$x[,1:30] | |
# Add category to each one (1/0) | |
category <- as.integer(grepl("cat", files)) | |
embeddings_with_category <- cbind(pca_embeddings, category) | |
data <- data.frame(embeddings_with_category) | |
# Training | |
library(dplyr) | |
data <- data %>% mutate(id = row_number()) | |
train_data <- data %>% sample_frac(.70) | |
test_data <- anti_join(data, train_data, by = 'id') | |
# Remove IDs | |
train_data <- within(train_data, rm(id)) | |
test_data <- within(test_data, rm(id)) | |
model <- glm(category ~ ., data = train_data, family = "binomial") | |
# Get results | |
predict <- predict(model, newdata=test_data, type = 'response') | |
table(test_data$category, predict > 0.5) | |
library(ROCR) | |
ROCRpred <- prediction(predict, test_data$category) | |
ROCRperf <- performance(ROCRpred, 'tpr','fpr') | |
plot(ROCRperf, colorize = TRUE, text.adj = c(-0.2,1.7)) | |
# Test random file | |
test_image <- function(url) { | |
download.file(url, "/tmp/image.jpg", quiet=TRUE) | |
emb = embed_image_file("/tmp/image.jpg", conn=conn) | |
reduced <- predict(pca, newdata=matrix(emb, nrow =1)) | |
predict <- predict(model, newdata=as.data.frame(reduced), type = 'response') | |
if (predict > 0.5) r <- "CAT" else r <- "DOG" | |
print(r) | |
result <- r | |
} | |
print("IS DOG") | |
test_image("https://live-cdn.shelterluv.com/sites/default/files/animal_pics/13071/2019/05/23/12/20190523125459.png") | |
print("IS DOG") | |
test_image("https://epi.azureedge.net/website-images/images/a-year-in-the-life-dog/dog_7_600x400.jpg?sfvrsn=edc6d67b_2") | |
print("IS DOG") | |
test_image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRdqPShXGJGqBnG1-1LU-_I5ZJSMt59iyaeT6H-luJpArY2DCXV") | |
print("IS CAT") | |
test_image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR_hclflPtqCBTcgYLf8Gr07cZf6QtSBed1JPTcm4_dtBwPS0p4") | |
print("IS CAT") | |
test_image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR4h5c7mqUm-zizDRCzAV5Er9TbEV8xsm5irY1OoAGxJhYpCJntQw") | |
print("IS CAT") | |
test_image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTeS2-KnbKAJzIT_cpwBP0mZjeH109wquYnV4njvID5yqhvZ4Ux") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment