Skip to content

Instantly share code, notes, and snippets.

@thejsj
Last active June 1, 2019 00:33
Show Gist options
  • Save thejsj/09150063d678fb79389411462a6e70f1 to your computer and use it in GitHub Desktop.
Save thejsj/09150063d678fb79389411462a6e70f1 to your computer and use it in GitHub Desktop.
Basilica R Demo
# Install Basilica
install.packages("https://storage.googleapis.com/basilica-r-client/basilica_0.0.2.tar.gz", repos=NULL)
library(basilica)
# Create a connection
conn = connect(auth_key="SLOW_DEMO_KEY")
# Change the working directory
# Besure to download the demo file first
# wget https://storage.googleapis.com/basilica-public/cats_dogs_demo.tgz
# tar -xf cats_dogs_demo.tgz
setwd('~/Desktop/cats_dogs_demo')
# Example on how to use the library
embedding <- embed_image_file("./images/dog.1.jpg", conn=conn)
# Getting embddings for all files
files <- list.files('./images')
image_files <- paste('./images/', files, sep="")
# embeddings <- embed_image_files(image_files, conn=conn)
# Loading from JSON
library("rjson")
embeddings_json <- paste('~/Desktop/embeddings/', files, '.emb', sep="")
embeddings_data <- embeddings_json %>%
lapply(function(x) fromJSON(file = file.path(x))) %>%
purrr::flatten_dbl() %>%
matrix(ncol = 2048)
# PCA
pca <- prcomp(embeddings_data, center = TRUE,scale = TRUE)
pca_embeddings <- pca$x[,1:30]
# Add category to each one (1/0)
category <- as.integer(grepl("cat", files))
embeddings_with_category <- cbind(pca_embeddings, category)
data <- data.frame(embeddings_with_category)
# Training
library(dplyr)
data <- data %>% mutate(id = row_number())
train_data <- data %>% sample_frac(.70)
test_data <- anti_join(data, train_data, by = 'id')
# Remove IDs
train_data <- within(train_data, rm(id))
test_data <- within(test_data, rm(id))
model <- glm(category ~ ., data = train_data, family = "binomial")
# Get results
predict <- predict(model, newdata=test_data, type = 'response')
table(test_data$category, predict > 0.5)
library(ROCR)
ROCRpred <- prediction(predict, test_data$category)
ROCRperf <- performance(ROCRpred, 'tpr','fpr')
plot(ROCRperf, colorize = TRUE, text.adj = c(-0.2,1.7))
# Test random file
test_image <- function(url) {
download.file(url, "/tmp/image.jpg", quiet=TRUE)
emb = embed_image_file("/tmp/image.jpg", conn=conn)
reduced <- predict(pca, newdata=matrix(emb, nrow =1))
predict <- predict(model, newdata=as.data.frame(reduced), type = 'response')
if (predict > 0.5) r <- "CAT" else r <- "DOG"
print(r)
result <- r
}
print("IS DOG")
test_image("https://live-cdn.shelterluv.com/sites/default/files/animal_pics/13071/2019/05/23/12/20190523125459.png")
print("IS DOG")
test_image("https://epi.azureedge.net/website-images/images/a-year-in-the-life-dog/dog_7_600x400.jpg?sfvrsn=edc6d67b_2")
print("IS DOG")
test_image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRdqPShXGJGqBnG1-1LU-_I5ZJSMt59iyaeT6H-luJpArY2DCXV")
print("IS CAT")
test_image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR_hclflPtqCBTcgYLf8Gr07cZf6QtSBed1JPTcm4_dtBwPS0p4")
print("IS CAT")
test_image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR4h5c7mqUm-zizDRCzAV5Er9TbEV8xsm5irY1OoAGxJhYpCJntQw")
print("IS CAT")
test_image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTeS2-KnbKAJzIT_cpwBP0mZjeH109wquYnV4njvID5yqhvZ4Ux")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment