Skip to content

Instantly share code, notes, and snippets.

@benmarwick
Created May 19, 2018 03:54
Show Gist options
  • Save benmarwick/396fd66699645e7242b700971a26cbf6 to your computer and use it in GitHub Desktop.
Save benmarwick/396fd66699645e7242b700971a26cbf6 to your computer and use it in GitHub Desktop.
Cluster images by similarity, and plot
# read in the images...
library(magick)
img_paths <- list.files("../outline_P/", full.names = TRUE )
n <- 100 # just get a sample
imgs <- image_read(img_paths[1:n])
# reduce size
imgs_cropped <- image_crop(imgs, "100x100")
# rescale, all the same size
imgs_scaled <- image_scale(imgs_cropped, "5x5")
## convert to EBImage format, grayscale, matrix...
# one image per row
library(EBImage)
x2 <- vector("list", length = length(imgs_scaled))
for(i in seq_along(imgs_scaled)){
x1 <- as_EBImage(image_convert(imgs_scaled[i], colorspace = 'gray'))
x2[[i]] <- as.vector(imageData(x1))
}
names(x2) <- img_paths[1:n]
# list of vectors to data frame, one row per image
library(dplyr)
x3 <- bind_rows(x2)
x4 <- t(x3)
# how many clusters?
library(optCluster)
opclust_out <- optCluster(x4, nClust = 2:10,
clMethods = c("kmeans"),
validation = "stability")
# do kmeans
library(fpc)
k_out <- kmeans(x4, max(optAssign(opclust_out)$cluster))
x5 <- discrcoord(x4, k_out$cluster)
plot(x5$proj[ , 1:2], col = k_out$cluster)
## plot
library("ggplot2")
library("ggimage")
# prepare data frame for plotting
d <- data.frame(x = x5$proj[ , 1],
y = x5$proj[ , 2],
image = img_paths[1:n],
clus = k_out$cluster)
# draw the plot
ggplot(d,
aes(x, y)) +
geom_image(aes(image = image),
size=0.075) +
geom_point(aes(colour = factor(clus))) +
theme_minimal()
@benmarwick
Copy link
Author

rplot

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment