Created
November 29, 2012 20:37
-
-
Save pssguy/4171750 to your computer and use it in GitHub Desktop.
Shiny App allowing online selection of subjects for graphical and tabular presentation of daily Wikipedia search rates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# libraries used. install as necessary | |
library(shiny) | |
library(RJSONIO) # acquiring and parsing data | |
library(ggplot2) # graphs | |
library(plyr) # manipulating data | |
library(lubridate) #dates | |
library(stringr) | |
trim.leading <- function (x) sub("^\\s+", "", x) | |
shinyServer(function(input, output) { | |
data <- reactive(function() { | |
# create a set of months to be analyzed | |
dates <-seq(Sys.time()-months(input$obs[2]),Sys.time()-months(input$obs[1]), by = "month") | |
# create blank dataframe to hold three fields | |
allData <- data.frame(count=numeric(),date=character(),name=character()) | |
# seperate each variable of the subject vector | |
subject <- str_split(input$subjects, ",")[[1]] | |
# loop through subjects and months | |
for(k in 1:length(subject)) { | |
# handle remote problems related to strings | |
target <- trim.leading(subject[k]) | |
target <- str_replace(target," ","_") | |
# create dataframe for individual records | |
df <- data.frame(count=numeric()) | |
for (i in 1:length(dates)) { | |
yr <- year(dates[i]) | |
mth <- month(dates[i]) | |
if (str_length(mth)==1) { | |
mth<-paste0("0",as.character(mth)) | |
} | |
# obtain and process daily count data by month by target | |
url <- paste0("http://stats.grok.se/json/en/",yr,mth,"/",target) | |
raw.data <- readLines(url, warn="F") | |
rd <- fromJSON(raw.data) | |
rd.views <- rd$daily_views | |
df <- rbind(df,as.data.frame(rd.views)) | |
} | |
#create the dataframe with all targets search counts by day | |
df$date <- as.Date(rownames(df)) | |
df$name <- subject[k] | |
colnames(df) <- c("count","date","name") | |
df <- arrange(df,date) | |
allData <- rbind(allData,df) | |
} | |
return(allData) | |
}) | |
# Create a heading based on range of dates selected for printing as a caption | |
output$caption <- reactiveText(function() { | |
endDate <- Sys.time()-months(input$obs[1]) | |
startDate <- Sys.time()-months(input$obs[2]) | |
if (input$obs[2]==0){ | |
paste("Daily rates for",month(Sys.time(), label = TRUE, abbr = TRUE),year(Sys.time()),sep=" ") | |
} else if ((input$obs[2]!=0)&(year(endDate)==year(startDate))) { | |
paste("Daily rates from",month(startDate, label = TRUE, abbr = TRUE),"to",month(endDate, label = TRUE, abbr = TRUE), year(endDate),sep=" ") | |
} else { | |
paste("Daily rates from",month(startDate, label = TRUE, abbr = TRUE),year(startDate),"to",month(endDate, label = TRUE, abbr = TRUE), year(endDate),sep=" ") | |
} | |
}) | |
# create plot for linear and log scales | |
output$plot <- reactivePlot(function() { | |
if (input$log) { | |
print(ggplot(data(), aes(x=date,y=log10(count),group=name,colour=name))+ | |
geom_line()+ylab("log10")+xlab("")+theme_bw() + | |
theme(legend.position="top",legend.title=element_blank(),legend.text = element_text(colour="blue", size = 14, face = "bold"))) | |
} else { | |
print(ggplot(data(), aes(x=date,y=count,group=name,colour=name))+ | |
geom_line()+ylab("")+xlab("") +theme_bw() + | |
theme(legend.position="top",legend.title=element_blank(),legend.text = element_text(colour="blue", size = 14, face = "bold"))) | |
} | |
}) | |
# create summary data for each subject | |
output$view <- reactiveTable(function() { | |
myTable <- data() | |
myTable$count <- as.integer(myTable$count) | |
myTable$date <- as.character(myTable$date) | |
mySummary <- ddply(subset(myTable,count>0),.(name), summarize, mean=mean(count),median=median(count),min=min(count),max=max(count),maxdate=date[which.max(count)]) | |
mySummary$showMax <- paste0(day(mySummary$maxdate)," ",month(mySummary$maxdate, label = TRUE, abbr = TRUE),", ",year(mySummary$maxdate)) | |
mySummary$maxdate <- NULL | |
mySummary <- arrange(mySummary,desc(mean)) | |
colnames(mySummary) <- c("","Mean","Median","Min","Max","Max Date") | |
mySummary | |
}) | |
# make data downloadable | |
output$downloadData <- downloadHandler( | |
# filename = function() { paste(input$data, '.csv', sep='') }, in tutorial to distinguish files trickier with my work | |
filename = function() { paste('results.csv', sep='') }, | |
content = function(file) { | |
write.csv(data(), file) | |
} | |
) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
shinyUI(pageWithSidebar( | |
# Application title | |
headerPanel("Wikipedia Search Rates"), | |
# Sidebar with controls to select the subjects and time span | |
sidebarPanel( | |
helpText(p( | |
"The graph represents the daily number of Wikipedia searches for | |
any subject(s) - animal, vegetable or mineral - over recent months."), | |
p("The data is available from December 2007 | |
to the present day. Adjust the slider to amend the time covered."), | |
p("Increasing the number of subjects and | |
extending the time period will impact processing time")), | |
wellPanel( | |
p(strong("Enter Subject(s), correctly spelt, seperated by commas")), | |
textInput(inputId = "subjects", label = " ", value = "Selena Gomez, Justin Bieber"), | |
p("For ambiguous names use wiki nomenclature e.g. Andrew Clark (priest)"), | |
p(strong("Date range (months back from present);")), | |
sliderInput(inputId = "obs", | |
label=" ", | |
min = 0, max = 60, step = 1, value = c(0,2)) | |
), | |
div(class="span6", submitButton("Get Graph")), | |
div(class="span6", checkboxInput(inputId = "log", label = "log10 scale", value = FALSE)), | |
helpText("Use log scale if compared searches are significantly different"), | |
downloadButton('downloadData', 'Download Output as csv') | |
), | |
# Show the caption a line graph of the dauly rate and summary of results | |
mainPanel( | |
h3(textOutput("caption")), | |
plotOutput("plot"), | |
tableOutput("view") | |
) | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment