Last active
August 29, 2015 14:07
-
-
Save dggoldst/c71f60186dc1d581df29 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("dplyr") | |
library("maps") | |
library("ggplot2") | |
library("mapproj") | |
theme_clean <- function(base_size = 12) { | |
require(grid) | |
theme_grey(base_size) %+replace% | |
theme( | |
axis.title = element_blank(), | |
axis.text = element_blank(), | |
panel.background = element_blank(), | |
panel.grid = element_blank(), | |
axis.ticks.length = unit(0,"cm"), | |
axis.ticks.margin = unit(0,"cm"), | |
panel.margin = unit(0,"lines"), | |
plot.margin = unit(c(0,0,0,0),"lines"), | |
legend.position = "bottom", | |
complete = TRUE | |
) | |
} | |
simpleCap <- function(x) { | |
s <- strsplit(x, " ")[[1]] | |
paste(toupper(substring(s, 1,1)), tolower(substring(s, 2)), | |
sep="", collapse=" ") | |
} | |
vsimpleCap=Vectorize(simpleCap) | |
state_map=map_data(map="state") | |
df=read.csv("directory.csv.gz", header=TRUE,stringsAsFactors=FALSE) | |
st=read.csv("state_table.csv.gz", header=TRUE,stringsAsFactors=FALSE) | |
names(st)=c("name","state","USregion") | |
df=left_join(df,st,by="state") | |
rg=read.csv("regions.csv.gz", header=TRUE,stringsAsFactors=FALSE) | |
#Cleanup | |
df=mutate(df, | |
country=vsimpleCap(country), | |
country=ifelse(country=="","USA",as.character(country)), | |
country=ifelse(country=="United Kingdom","UK",as.character(country)), | |
country=ifelse(country=="Uk","UK",as.character(country)), | |
country=ifelse(country=="P.r. China","China",as.character(country)), | |
country=ifelse(country=="Columbia","Colombia",as.character(country)), | |
country=ifelse(country=="Republic Of Armenia","Armenia",as.character(country)) | |
) | |
countries=df %>% group_by(country) %>% summarise(count=length(country)) %>% arrange(-count) | |
countries=left_join(countries,rg) | |
countries$region=factor(countries$region,levels=(countries%>%group_by(region)%>%summarize(count=sum(count))%>%arrange(-count))$region) | |
countries$usaqual = factor(with(countries,ifelse(country=="USA","USA","Rest of World")),levels=c("USA","Rest of World")) | |
usaness= countries %>% group_by(usaqual) %>% summarize(count=sum(count)) %>% arrange(-count) | |
p=ggplot(usaness,aes(x=usaqual,y=count,fill=usaqual)) | |
p=p+geom_bar(stat="identity") | |
p=p+labs(x="",y="",title="Members in USA and Rest of World") | |
p=p+theme(legend.position = "none") | |
p | |
ggsave(plot=p,file="USAvRest.png",width=4,height=4) | |
p=ggplot(countries,aes(x=region,y=count,fill=region)) | |
p=p+geom_bar(stat="identity") | |
p=p+labs(x="",y="",title="Members by Region") | |
p=p+theme( axis.text.x = element_text(angle=90, vjust=0.5),legend.position="none") | |
p=p+theme(legend.position = "none") | |
p | |
ggsave(plot=p,file="MembersByRegion.png",width=4,height=4) | |
p=ggplot(filter(countries,country!="USA" & count>5),aes(x=count,y=reorder(country,count))) | |
p=p+geom_point(size=3) | |
p=p+labs(x="Members",y="",title="Members Outside USA\nin Countries with >5 Members") | |
p=p+theme_bw() | |
p | |
ggsave(plot=p,file="MembersByCountry.png",width=4,height=6) | |
#Make state map | |
states=df %>% filter(!is.na(USregion)) %>% group_by(name) %>% summarise(count=length(state)) %>% arrange(-count) | |
states$name=tolower(states$name) | |
states[states$name=="washington dc","name"]="district of columbia" | |
allst=data.frame(name=as.character(unique(state_map$region))) | |
states=merge(allst,states,by="name",all.x=TRUE) %>% arrange(-count) | |
states[is.na(states$count),2]=0 | |
names(states)=c("state","count") | |
p=ggplot(states, aes(map_id=state, fill=count)) | |
p=p+geom_map(map=state_map,color="black") | |
p=p+scale_fill_gradient2(low="#559999",mid="grey90",high="#BB650B",midpoint=median(states$count)) | |
p=p+expand_limits(x=state_map$long,y=state_map$lat) | |
p=p+coord_map("polyconic") | |
p=p+theme_clean() | |
p | |
ggsave(plot=p,file="us.member.map.png",width=6,height=4) | |
states$state_simplecap = vsimpleCap(as.character(states$state)) | |
#Members in states with > 5 members | |
p=ggplot(filter(states, count>5),aes(x=count,y=reorder(state_simplecap,count))) | |
p=p+geom_point(size=3) | |
p=p+labs(x="Members",y="",title="Members in States \nWith >5 Members") | |
p=p+theme_bw() | |
p | |
ggsave(plot=p,file="MembersByState.png",width=4,height=8) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment