Created
May 19, 2018 19:41
-
-
Save accessnash/4540469624f7f4b1d7b21bf6ce7a3255 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #SAMPLE CODE FOR SPATIAL PATTERNS OF VARIABLES | |
| library(ggplot2) | |
| library(fiftystater) | |
| bachdeg <- read.csv("C:/Users/DASA0/Desktop/Stat 524/Project/bachdeg.csv", sep=",") | |
| bachdeg$state <- tolower(bachdeg$State) | |
| # map_id creates the aesthetic mapping to the state name column in your data | |
| p <- ggplot(bachdeg, aes(map_id = state)) + | |
| # map points to the fifty_states shape data | |
| geom_map(aes(fill = bachdeg), map = fifty_states) + | |
| expand_limits(x = fifty_states$long, y = fifty_states$lat) + | |
| coord_map() + scale_fill_gradient(low='yellow', high='red') + | |
| scale_x_continuous(breaks = NULL) + | |
| scale_y_continuous(breaks = NULL) + | |
| labs(x = "", y = "") + | |
| theme(legend.position = "bottom", | |
| panel.background = element_blank()) | |
| P1 <- p + theme_bw() + labs(fill = "Bachelor's degree" | |
| ,title = "People with at least a Bachelor's degree per 100,000 population, 2014", x="", y="") | |
| # P.S.: OTHER VARIABLES CAN BE SIMILARLY PLOTTED USING SAME CODE | |
| #SAMPLE CODE FOR CLUSTER ANALYSIS | |
| data2 <- read.csv("C:/Users/DASA0/Desktop/Stat 524/Project/data2.csv", sep=",") | |
| data2 <- as.data.frame(data2) | |
| library(cluster) | |
| data_new3 <- cbind( scale(data2[,2:9], FALSE, apply(data2[,2:9], 2, sd)), data2[10]) | |
| distmat3 <- dist(data_new3, method="euclidean") | |
| fit.comp1 <- hclust(distmat3, method="average") | |
| plot(fit.comp1, labels=data2$stateName) | |
| member.fitcomp1 <- cutree(fit.comp1, 4) | |
| aggregate(data_new3, list(member.fitcomp1), mean) | |
| round(aggregate(data2[,-c(1,1)], list(member.fitcomp1), mean),2) | |
| plot(silhouette(cutree(fit.comp1,4),distmat3)) | |
| fit.comp2 <- hclust(distmat3, method ="mcquitty") | |
| plot(fit.comp2, labels=data2$stateName) | |
| member.fitcomp2 <- cutree(fit.comp2, 4) | |
| aggregate(data_new3, list(member.fitcomp2), mean) | |
| round(aggregate(data2[,-c(1,1)], list(member.fitcomp2), mean),2) | |
| plot(silhouette(cutree(fit.comp2,4),distmat3)) | |
| fit.comp3 <- hclust(distmat3, method="complete") | |
| plot(fit.comp3, labels=data2$stateName) | |
| member.fitcomp3 <- cutree(fit.comp3, 4) | |
| aggregate(data_new3, list(member.fitcomp3), mean) | |
| round(aggregate(data2[,-c(1,1)], list(member.fitcomp3), mean),2) | |
| plot(silhouette(cutree(fit.comp2,4),distmat3)) | |
| fit.comp4 <- hclust(distmat3, method ="ward.D") | |
| plot(fit.comp4, labels=data2$stateName) | |
| member.fitcomp4 <- cutree(fit.comp4, 4) | |
| aggregate(data_new3, list(member.fitcomp4), mean) | |
| round(aggregate(data2[,-c(1,1)], list(member.fitcomp4), mean),2) | |
| plot(silhouette(cutree(fit.comp4,4),distmat3)) | |
| fit.comp5 <- hclust(distmat3, method ="ward.D2") | |
| plot(fit.comp5, labels=data2$stateName) | |
| member.fitcomp5a <- cutree(fit.comp5, 4) | |
| aggregate(data_new3, list(member.fitcomp5a), mean) | |
| round(aggregate(data2[,-c(1,1)], list(member.fitcomp5a), mean),2) | |
| plot(silhouette(cutree(fit.comp5,4),distmat3)) | |
| member.fitcomp5b <- cutree(fit.comp5, 5) | |
| aggregate(data_new3, list(member.fitcomp5b), mean) | |
| plot(silhouette(cutree(fit.comp5,5),distmat3)) | |
| #Scree Plot | |
| wss <- (nrow(data_new3)-1)*sum(apply(data_new3, 2, var)) | |
| for (i in 2:48) wss[i] <- sum(kmeans(data_new3, centers=i)$withinss) | |
| p <- plot(1:48, wss, type="b", main = "Scree Plot", xlab="Number of Clusters", ylab = "Within Group SS", pch=16) | |
| #Correlation among variables | |
| crime_mat <- data.matrix(data_new3[,1:8]) | |
| simmat <- round(cor(crime_mat), 3) | |
| lower.tri(simmat, diag=FALSE) | |
| upper<-simmat | |
| upper[upper.tri(simmat)]<-"" | |
| upper<-as.data.frame(upper) | |
| distmat <- as.dist(1-simmat) | |
| fit.comp <- hclust(distmat, method="complete") | |
| plot(fit.comp) | |
| #Scatter Plot | |
| pairs(~ violcr+ propcr+ medinc + ymunemp+ gini + bachdeg,data=data2) | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the R code for an Exploratory analysis project I worked on. Write up for the project is here (with some changes compared to the project I submitted at school)- https://nashstatistica.wordpress.com/2018/05/27/how-i-stopped-worrying-bought-a-gun/