Created
August 1, 2014 17:41
-
-
Save dgrtwo/db83a968e880d8c325b2 to your computer and use it in GitHub Desktop.
Analysis of StackOverflow activity trends
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(ggplot2) | |
library(reshape2) | |
# used the query here: | |
# http://data.stackexchange.com/stackoverflow/query/213011/metrics-of-question-activity-over-time | |
dat = read.csv("permonth_data.csv") | |
dat = dat %>% mutate(Date=as.Date(paste(Year, Month, "15", sep="-"))) | |
# remove the first few months (very different site then) and the last month (since the data | |
# may be slightly out of date) | |
dat = dat %>% filter(Year > 2008) %>% filter(Year < 2014 | Month < 7) | |
print(ggplot(dat, aes(Date, Questions)) + geom_line() + ggtitle("Questions Per Month")) | |
dat.m = melt(dat, id=c("Year", "Month", "Date", "Questions")) | |
dat.m = dat.m %>% mutate(normalized=value / Questions) | |
ggsave("Questions.png") | |
print(ggplot(dat.m, aes(Date, normalized)) + geom_line() + facet_wrap(~ variable, scale="free_y") + | |
ggtitle("Average Per Question") + ylab("Metric per question")) | |
ggsave("Averages.png") | |
print(ggplot(dat.m, aes(Date, value)) + geom_line() + facet_wrap(~ variable, scale="free_y") + | |
ggtitle("Total (Unnormalized)") + ylab("Number")) | |
ggsave("Totals.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment