Created
August 4, 2015 19:01
-
-
Save jwinternheimer/a1a6320f08f7287c059d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table); library(dplyr); library(tidyr); library(ggplot2);library(NbClust) | |
## Read Data | |
data <- read.table("~/Downloads/business_users.csv",sep=",",header=T) | |
names(data) <- c("user_id","team_members","profiles","months_paying","plan_days","updates","updates_per_day_per_profile") | |
## Prep Data | |
data[is.na(data)] <- 0 | |
## Separate Variables We're Interested In | |
features <- data %>% | |
select(-user_id,-plan_days,-updates,-months_paying) | |
## Find how many clusters to use with wssplot | |
wssplot <- function(data, nc=15, seed=1234){ | |
wss <- (nrow(data)-1)*sum(apply(data,2,var)) | |
for (i in 2:nc){ | |
set.seed(seed) | |
wss[i] <- sum(kmeans(data, centers=i)$withinss)} | |
plot(1:nc, wss, type="b", xlab="Number of Clusters", | |
ylab="Within groups sum of squares")} | |
wssplot(features) | |
## NbClust | |
nc <- NbClust(features, min.nc=2, max.nc=15, method="kmeans") | |
table(nc$Best.n[1,]) | |
barplot(table(nc$Best.n[1,]), | |
xlab="Numer of Clusters", ylab="Number of Criteria", | |
main="Number of Clusters Chosen") | |
## K-means Clustering | |
set.seed(1234) | |
results <- kmeans(features,3) | |
results | |
## Add cluster to original dataset | |
data$cluster <- as.factor(results$cluster) | |
## Plot number of profiles and team members for each cluster | |
ggplot(data,aes(x=profiles,y=team_members,color=cluster)) + | |
geom_point(size=) + | |
scale_x_continuous(limits=c(0,50)) + | |
labs(x="Number of Profiles",y="Number of Team Members") + | |
theme_minimal() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment