Last active
May 1, 2017 21:30
-
-
Save nkrabben/87d978591cd13de2851a5262cf970e82 to your computer and use it in GitHub Desktop.
Cluster analysis of the NDSR Competencies survey (https://osf.io/zndwq/)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(tidyr) | |
library(factoextra) | |
# Read data, drop last 3 columns that don't ask about skills | |
osf_url = "https://files.osf.io/v1/resources/zndwq/providers/osfstorage/570be0eab83f6901d62b19d9" | |
responses <- read_csv(osf_url)[,1:30] | |
# Reshape raw data into number of responses per importance level | |
responses %>% | |
# Fix typo | |
rename(NeedsAssessGapAnalyses = NeedsAssessGayAnalyses) %>% | |
gather(skill) %>% | |
group_by(skill, value) %>% | |
summarize(n = n()) %>% | |
ungroup() %>% | |
spread(value, n, fill = 0) %>% | |
# Move back to data.frame since tibbles can't have row labels | |
as.data.frame()-> | |
skills | |
# Add skill names as row labels and drop skill column | |
rownames(skills) <- skills$skill | |
skills <- skills[,-1] | |
# Run cluster analysis with repeatable seed | |
set.seed(42) | |
skill_clusters <- eclust(skills, "kmeans", k = 3, nstart = 30, graph = F) | |
# Visualize k-means clusters | |
fviz_cluster(skill_clusters, ellipse.type = "norm", repel = T) + | |
labs(x = "55% of variance in necessity", y = "25% of variance in necessity", | |
title = "NDSR Project Skills Grouped by Variance of Importance") + | |
# Add meaning to cluster labels, wish there was a convenience function to do this | |
scale_shape_discrete(name="Skill Need", | |
breaks=c(1, 2, 3), | |
labels=c("Required Skills", "Niche Skills", "Nice-to-Have Skills")) + | |
scale_fill_discrete(name="Skill Need", | |
breaks=c(1, 2, 3), | |
labels=c("Required Skills", "Niche Skills", "Nice-to-Have Skills")) + | |
scale_color_discrete(name="Skill Need", | |
breaks=c(1, 2, 3), | |
labels=c("Required Skills", "Niche Skills", "Nice-to-Have Skills")) + | |
ggsave('ndsr_skill_cluster.png') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment