Created
May 19, 2018 03:54
-
-
Save benmarwick/396fd66699645e7242b700971a26cbf6 to your computer and use it in GitHub Desktop.
Cluster images by similarity, and plot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# read in the images... | |
library(magick) | |
img_paths <- list.files("../outline_P/", full.names = TRUE ) | |
n <- 100 # just get a sample | |
imgs <- image_read(img_paths[1:n]) | |
# reduce size | |
imgs_cropped <- image_crop(imgs, "100x100") | |
# rescale, all the same size | |
imgs_scaled <- image_scale(imgs_cropped, "5x5") | |
## convert to EBImage format, grayscale, matrix... | |
# one image per row | |
library(EBImage) | |
x2 <- vector("list", length = length(imgs_scaled)) | |
for(i in seq_along(imgs_scaled)){ | |
x1 <- as_EBImage(image_convert(imgs_scaled[i], colorspace = 'gray')) | |
x2[[i]] <- as.vector(imageData(x1)) | |
} | |
names(x2) <- img_paths[1:n] | |
# list of vectors to data frame, one row per image | |
library(dplyr) | |
x3 <- bind_rows(x2) | |
x4 <- t(x3) | |
# how many clusters? | |
library(optCluster) | |
opclust_out <- optCluster(x4, nClust = 2:10, | |
clMethods = c("kmeans"), | |
validation = "stability") | |
# do kmeans | |
library(fpc) | |
k_out <- kmeans(x4, max(optAssign(opclust_out)$cluster)) | |
x5 <- discrcoord(x4, k_out$cluster) | |
plot(x5$proj[ , 1:2], col = k_out$cluster) | |
## plot | |
library("ggplot2") | |
library("ggimage") | |
# prepare data frame for plotting | |
d <- data.frame(x = x5$proj[ , 1], | |
y = x5$proj[ , 2], | |
image = img_paths[1:n], | |
clus = k_out$cluster) | |
# draw the plot | |
ggplot(d, | |
aes(x, y)) + | |
geom_image(aes(image = image), | |
size=0.075) + | |
geom_point(aes(colour = factor(clus))) + | |
theme_minimal() |
Author
benmarwick
commented
May 19, 2018
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment