Created
March 14, 2013 13:15
-
-
Save davidcoallier/5161214 to your computer and use it in GitHub Desktop.
Script that looks up and plots the language popularity based on the stack-overflow tag-count.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(RColorBrewer) | |
require(ggplot2) | |
require(scales) | |
# Thanks to drew conway for this handy function. | |
get.stack<-function(tok) { | |
# Must check for XML install, thanks onertipaday! | |
if (!require(XML)) install.packages('XML') | |
library(XML) | |
# Enter a SO tag as character string, and number of tags are returned | |
tok<-gsub("(/| )","-",tok) | |
tok<-gsub("#","%23",tok,fixed=TRUE) | |
base.stack<-"http://stackoverflow.com/questions/tagged/" | |
stack.tree<-htmlTreeParse(paste(base.stack,tok,sep=""),useInternalNodes=TRUE) | |
tag.count<-getNodeSet(stack.tree,"//div[@class='module']/div[@class='summarycount al']") | |
tag.num<-suppressWarnings(as.numeric(gsub(",","",xmlValue(tag.count[[1]]),fixed=TRUE))) | |
if(is.na(tag.num)) { | |
warning(paste("Something went wrong trying to parse '",tok,"'.\nNA returned",sep="")) | |
} | |
return(tag.num) | |
} | |
# Feel free to add more if you want. | |
languages <- c( | |
'python', 'php', 'ruby', 'nodejs', 'erlang', | |
'coldfusion', 'java', 'haskell', 'c#', 'objc', | |
'lisp', 'go', 'r', 'assembly', 'scala', | |
'actionscript', 'shell', 'javascript') | |
results <- data.frame() | |
for (lang in 1:length(languages)) { | |
language <- languages[lang] | |
languageStackPop <- get.stack(language) | |
results <- rbind.data.frame( | |
results, | |
data.frame(name = language, popularity = languageStackPop) | |
) | |
} | |
ggplot(results, aes(x=reorder(name, popularity), y=popularity)) + | |
geom_bar() + | |
geom_text(data=results,aes(x=name,y=popularity,label=popularity),hjust=-0.25) + | |
coord_flip() + | |
theme_bw() + | |
ylab("Number of Questions on StackOverflow") + | |
xlab("Language Name") + | |
labs(title="Language Popularity by Number of tags on StackOverflow") + | |
scale_y_continuous(labels=comma) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment