Last active
May 3, 2016 04:25
-
-
Save Vessy/4383737 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("shiny") | |
library("XML") | |
library("stringr") | |
library("RCurl") | |
library("wordcloud") | |
library("tm") | |
shinyServer(function(input, output) { | |
output$plot1 <- reactivePlot(function() { | |
getAbstracts <- function(author, dFrom, dTill, nRecs) | |
{ | |
#For more details about Pubmed queries see: http://www.ncbi.nlm.nih.gov/books/NBK25500/ | |
#Text search - basic URL | |
eSearch <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=" | |
#Data record download - basic URL | |
eDDownload <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" | |
#In case of multiple words (e.g., first and the last name), add "+" sign in between them | |
aL <- str_replace(author, " ", "+") | |
#Add the search keyword - author | |
aQ <- paste(aL, "[author]", sep = "") | |
#Format the publication date and add the search keyword - pdat | |
#If only one year is provided, use that year, otherwise use year_1:year_2 | |
dQ <- "" | |
if ((str_length(dFrom) > 0) & (str_length(dTill) > 0)) | |
{ | |
d1 <- paste(dFrom, dTill, sep = ":") | |
dQ <- paste(d1, "[pdat]", sep = "") | |
} | |
if ((str_length(dFrom) > 0) & (str_length(dTill) == 0)) | |
dQ <- paste(dFrom, "[pdat]", sep = "") | |
if ((str_length(dTill) > 0) & (str_length(dFrom) == 0)) | |
dQ <- paste(dTill, "[pdat]", sep = "") | |
#Add two seqrch queries together | |
hlpQ1 <- aQ | |
if (str_length(dQ) > 0) | |
hlpQ1 <- paste(aQ, dQ, sep = "+") | |
#Add the max number of retrieved articles at the end of the query | |
rmQ <- paste("&retmax=", nRecs, sep="") | |
hlpQ2 <- paste(hlpQ1, rmQ, sep="") | |
#Finalize the query and serch Pubmed | |
searchUrl <- paste(eSearch, hlpQ2, sep = "" ) | |
#Wait - to ensure that all requests will be processed | |
Sys.sleep(3) | |
hlpURL <- getURL(searchUrl) | |
#The result is in form of XML document - you can paste the searchUrl in the browser to see/download it | |
doc <- xmlTreeParse(hlpURL, asText = TRUE) | |
IdlistHlp = xmlValue(doc[["doc"]][["eSearchResult"]][["IdList"]]) | |
#I am sure there is more elegant way (i.e., a function) to proccess this, but I was lazy to search for it | |
if (length(IdlistHlp) > 0) | |
{ | |
Idlist <- c() | |
#Each ID is 8 digits long | |
for(k in 1:(str_length(IdlistHlp)/8)) | |
Idlist <- c(Idlist, str_sub(IdlistHlp, start = 8*(k-1) + 1, end = k*8)) | |
#Once we retrieved articles' IDs for the author/dates, we can process them and get abstracts | |
Sys.sleep(2) | |
hlp1 <- paste(eDDownload, paste(Idlist, collapse = ",", sep = ""), sep = "") | |
hlp2 <- paste(hlp1, "&rettype=abstract", sep = "") | |
testDoc <- xmlTreeParse(hlp2, useInternalNodes = TRUE) | |
topFetch <-xmlRoot(testDoc) | |
abst <- xpathSApply(topFetch, "//Abstract", xmlValue) | |
} | |
#In case that nothing was found | |
if (length(IdlistHlp) == 0) | |
abst = c("Zero", "Articles", "Found") | |
abst | |
} | |
plotWC <- function(abstracts, nc, cs) | |
{ | |
#Once we have abstracts, we can create a document corpus | |
abstTxt <- Corpus(VectorSource(abstracts)) | |
text2.corpus = tm_map(abstTxt, removePunctuation) | |
text2.corpus = tm_map(text2.corpus, content_transformer(tolower)) | |
text2.corpus = tm_map(text2.corpus, removeWords, stopwords("english")) | |
#Transform it into a matrix and sort based on the total word occurence | |
tdm <- TermDocumentMatrix(text2.corpus) | |
m <- as.matrix(tdm) | |
v <- sort(rowSums(m),decreasing=TRUE) | |
d <- data.frame(word = names(v),freq=v) | |
#Select the color scheme | |
pal2 <- brewer.pal(nc, cs) | |
#And plot the cloud | |
wordcloud(d$word,d$freq, scale=c(8,.2), min.freq = 2, max.words=50, random.order = FALSE, rot.per=.15, color = pal2, vfont=c("sans serif","plain")) | |
} | |
#Function to determine number of colors per each scheme | |
colNum <- function(type) { | |
switch(type, | |
Accent = 8, | |
Dark2 = 12, | |
Pastel1 = 9, | |
Pastel2 = 8, | |
Paired = 12, | |
Set1 = 9, | |
Set2 = 8, | |
Set3 = 12 | |
) | |
} | |
# Get inputs, download abstracts, and create a corresponding wordcloud | |
numCol <- colNum(input$colSel) | |
plotWC(getAbstracts(input$aName, input$yL, input$yH, input$nR), numCol, input$colSel) | |
}) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("shiny") | |
# Define UI | |
shinyUI(pageWithSidebar( | |
# Application title | |
headerPanel("Word Cloud Abstracts"), | |
sidebarPanel( | |
# Text input for the author's name, e.g., John Smith | |
textInput("aName", "Author name:", "John Smith"), | |
br(), | |
# More text inputs for the publication years | |
helpText("Search Pubmed articles published"), | |
numericInput("yL", "between", 2000, min = 1900, max = 2100), | |
numericInput("yH", "and", 2013, min = 1900, max = 2100), | |
br(), | |
# Slider inputs for the number of articles retrieved (newer articles should be on the top) | |
sliderInput("nR", "Resulting number of articles:", min = 1, max = 25, value = 10, step = 1), | |
br(), | |
# Drop-down input for the color scheme selection (RColorBrewer package for colors) | |
selectInput("colSel", "Select a color scheme:", c("Accent" = "Accent", "Dark" = "Dark2", "Pastel 1" = "Pastel1", "Pastel 2" = "Pastel2", "Paired" = "Paired", "One" = "Set1", "Two" = "Set2", "Three" = "Set3"), selected = "Accent") | |
), | |
mainPanel( | |
#Plot the wordcloud here | |
plotOutput("plot1", "800px", "600px") | |
) | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment