Skip to content

Instantly share code, notes, and snippets.

@Vessy
Last active May 3, 2016 04:25
Show Gist options
  • Save Vessy/4383737 to your computer and use it in GitHub Desktop.
Save Vessy/4383737 to your computer and use it in GitHub Desktop.
library("shiny")
library("XML")
library("stringr")
library("RCurl")
library("wordcloud")
library("tm")
shinyServer(function(input, output) {
output$plot1 <- reactivePlot(function() {
getAbstracts <- function(author, dFrom, dTill, nRecs)
{
#For more details about Pubmed queries see: http://www.ncbi.nlm.nih.gov/books/NBK25500/
#Text search - basic URL
eSearch <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term="
#Data record download - basic URL
eDDownload <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="
#In case of multiple words (e.g., first and the last name), add "+" sign in between them
aL <- str_replace(author, " ", "+")
#Add the search keyword - author
aQ <- paste(aL, "[author]", sep = "")
#Format the publication date and add the search keyword - pdat
#If only one year is provided, use that year, otherwise use year_1:year_2
dQ <- ""
if ((str_length(dFrom) > 0) & (str_length(dTill) > 0))
{
d1 <- paste(dFrom, dTill, sep = ":")
dQ <- paste(d1, "[pdat]", sep = "")
}
if ((str_length(dFrom) > 0) & (str_length(dTill) == 0))
dQ <- paste(dFrom, "[pdat]", sep = "")
if ((str_length(dTill) > 0) & (str_length(dFrom) == 0))
dQ <- paste(dTill, "[pdat]", sep = "")
#Add two seqrch queries together
hlpQ1 <- aQ
if (str_length(dQ) > 0)
hlpQ1 <- paste(aQ, dQ, sep = "+")
#Add the max number of retrieved articles at the end of the query
rmQ <- paste("&retmax=", nRecs, sep="")
hlpQ2 <- paste(hlpQ1, rmQ, sep="")
#Finalize the query and serch Pubmed
searchUrl <- paste(eSearch, hlpQ2, sep = "" )
#Wait - to ensure that all requests will be processed
Sys.sleep(3)
hlpURL <- getURL(searchUrl)
#The result is in form of XML document - you can paste the searchUrl in the browser to see/download it
doc <- xmlTreeParse(hlpURL, asText = TRUE)
IdlistHlp = xmlValue(doc[["doc"]][["eSearchResult"]][["IdList"]])
#I am sure there is more elegant way (i.e., a function) to proccess this, but I was lazy to search for it
if (length(IdlistHlp) > 0)
{
Idlist <- c()
#Each ID is 8 digits long
for(k in 1:(str_length(IdlistHlp)/8))
Idlist <- c(Idlist, str_sub(IdlistHlp, start = 8*(k-1) + 1, end = k*8))
#Once we retrieved articles' IDs for the author/dates, we can process them and get abstracts
Sys.sleep(2)
hlp1 <- paste(eDDownload, paste(Idlist, collapse = ",", sep = ""), sep = "")
hlp2 <- paste(hlp1, "&rettype=abstract", sep = "")
testDoc <- xmlTreeParse(hlp2, useInternalNodes = TRUE)
topFetch <-xmlRoot(testDoc)
abst <- xpathSApply(topFetch, "//Abstract", xmlValue)
}
#In case that nothing was found
if (length(IdlistHlp) == 0)
abst = c("Zero", "Articles", "Found")
abst
}
plotWC <- function(abstracts, nc, cs)
{
#Once we have abstracts, we can create a document corpus
abstTxt <- Corpus(VectorSource(abstracts))
text2.corpus = tm_map(abstTxt, removePunctuation)
text2.corpus = tm_map(text2.corpus, content_transformer(tolower))
text2.corpus = tm_map(text2.corpus, removeWords, stopwords("english"))
#Transform it into a matrix and sort based on the total word occurence
tdm <- TermDocumentMatrix(text2.corpus)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
#Select the color scheme
pal2 <- brewer.pal(nc, cs)
#And plot the cloud
wordcloud(d$word,d$freq, scale=c(8,.2), min.freq = 2, max.words=50, random.order = FALSE, rot.per=.15, color = pal2, vfont=c("sans serif","plain"))
}
#Function to determine number of colors per each scheme
colNum <- function(type) {
switch(type,
Accent = 8,
Dark2 = 12,
Pastel1 = 9,
Pastel2 = 8,
Paired = 12,
Set1 = 9,
Set2 = 8,
Set3 = 12
)
}
# Get inputs, download abstracts, and create a corresponding wordcloud
numCol <- colNum(input$colSel)
plotWC(getAbstracts(input$aName, input$yL, input$yH, input$nR), numCol, input$colSel)
})
})
library("shiny")
# Define UI
shinyUI(pageWithSidebar(
# Application title
headerPanel("Word Cloud Abstracts"),
sidebarPanel(
# Text input for the author's name, e.g., John Smith
textInput("aName", "Author name:", "John Smith"),
br(),
# More text inputs for the publication years
helpText("Search Pubmed articles published"),
numericInput("yL", "between", 2000, min = 1900, max = 2100),
numericInput("yH", "and", 2013, min = 1900, max = 2100),
br(),
# Slider inputs for the number of articles retrieved (newer articles should be on the top)
sliderInput("nR", "Resulting number of articles:", min = 1, max = 25, value = 10, step = 1),
br(),
# Drop-down input for the color scheme selection (RColorBrewer package for colors)
selectInput("colSel", "Select a color scheme:", c("Accent" = "Accent", "Dark" = "Dark2", "Pastel 1" = "Pastel1", "Pastel 2" = "Pastel2", "Paired" = "Paired", "One" = "Set1", "Two" = "Set2", "Three" = "Set3"), selected = "Accent")
),
mainPanel(
#Plot the wordcloud here
plotOutput("plot1", "800px", "600px")
)
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment