Skip to content

Instantly share code, notes, and snippets.

@dgrtwo
Created October 16, 2013 03:27
Show Gist options
  • Save dgrtwo/7002257 to your computer and use it in GitHub Desktop.
Save dgrtwo/7002257 to your computer and use it in GitHub Desktop.
K-means clustering interactive tutorial
library(ggplot2)
library(shiny)
shinyServer(function(input, output) {
output$main_plot <- renderPlot({
set.seed(205)
means = t(replicate(input$clusters, c(runif(1), runif(1))))
means = cbind(1:input$clusters, means)
means = apply(means, 2, function(col) rep(col, input$points))
means = data.frame(means)
colnames(means) = c("Cluster", "cX", "cY")
means$Cluster = factor(means$Cluster)
means$X = rnorm(NROW(means), means$cX, input$sd)
means$Y = rnorm(NROW(means), means$cY, input$sd)
if (input$iterations > 0) {
k = kmeans(as.matrix(means[c("X", "Y")]), input$kclusters,
iter.max=input$iterations)
means$Cluster = factor(k$cluster)
}
g = ggplot(means, aes(X, Y)) + geom_point(aes(col=Cluster))
if (input$iterations > 0) {
g = g + geom_point(aes(X, Y), data=as.data.frame(k$centers),
pch="X", size=10)
}
print(g)
})
})
library(shiny)
shinyUI(bootstrapPage(
sliderInput(inputId = "clusters",
label = "Number of true clusters in data",
min=1, max=6, value=3),
sliderInput(inputId = "sd",
label = "Standard deviation of clusters",
min=.001, max=.2, value=.02),
sliderInput(inputId = "kclusters",
label = "Number of clusters used in the k-means algorithm",
min=1, max=6, value=3),
sliderInput(inputId = "points",
label = "Number of points per cluster",
min=3, max=100, value=25),
sliderInput(inputId = "iterations",
label = "Number of iterations of k-means algorithm",
min=0, max=6, value=3),
plotOutput(outputId = "main_plot", height = "300px")
#plotOutput(outputId = "wss_plot", height = "300px")
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment