Skip to content

Instantly share code, notes, and snippets.

@johnbaums
Last active December 25, 2015 15:39
Show Gist options
  • Save johnbaums/7000303 to your computer and use it in GitHub Desktop.
Save johnbaums/7000303 to your computer and use it in GitHub Desktop.
parsePub: Parse publication details (provided in JSON format) returned by sortPubsToHTML: sorts pubs by year and parse with parsePub, optionally sending output to a text file. Here, pubs refers to a list of publications in JSON format, each of which is returned by the Mendeley Group Document Details method - http://apidocs.mendeley.com/home/user…
parsePub <- function(x, emphasize=NULL) {
if (!is.null(emphasize)) {
lname <- sapply(strsplit(sapply(x$authors, '[[', 2), '\\s+'), tail, 1)
finit <- substr(sapply(x$authors, '[[', 1), 1, 1)
em.lname <- sapply(strsplit(emphasize, '\\s+'), tail, 1)
em.finit <- substr(emphasize, 1, 1)
em <- !is.na(match(paste(finit, lname), paste(em.finit, em.lname)))
}
authsToVec <- function(y, emphasize=emphasize) {
initials <- paste0(substring(strsplit(x$authors[[y]]['forename'],
'\\s+')[[1]], 1, 1), '.', collapse='')
sprintf('%s%s, %s%s',
ifelse(!is.null(emphasize) && em[y], '<strong>', ''),
x$authors[[y]]['surname'],
initials,
ifelse(!is.null(emphasize) && em[y], '</strong>', ''))
}
authvec <- sapply(seq_along(x$authors), authsToVec, emphasize=emphasize)
authstring <- gsub(',([^,]*,[^,]*)$', ' &\\1', paste(authvec, collapse=', '))
if (is.null(x$volume)) {
sprintf("<p>%s (%s), <a href='%s'>%s.</a> <em>%s</em>.</p>",
authstring, x$year, x$url, x$title, x$published_in)
} else {
sprintf("<p>%s (%s), <a href='%s'>%s.</a> <em>%s</em>, %s: %s.</p>",
authstring, x$year, x$url, x$title, x$published_in,
x$volume, x$pages)
}
}
# See https://gist.github.com/johnbaums/7000303#file-publist-r for example usage.
library(devtools)
library(RMendeley)
library(ROAuth)
mkey <- 'mendeley consumer key' # see http://apidocs.mendeley.com/
msecret <- 'mendeley consumer secret'
#mc <- mendeley_auth(mkey, msecret)
#save(mc, file="~/../Dropbox/credentials/mendeley_credential.rdata")
load("~/../Dropbox/credentials/mendeley_credential.rdata")
# grab function to scrape QAECO member names from website
source_url('https://gist.github.com/johnbaums/32d902b7ecac0dac9ec4/raw/qaecoMembers.R')
qmem <- qaecoMembers()
# grab functions to parse pubs
source_url('https://gist.github.com/johnbaums/7000303/raw/sortPubsToHTML.R')
source_url('https://gist.github.com/johnbaums/7000303/raw/parsePub.R')
# grab Mendeley publication IDs
pubID <- function(group_id, maxresults=500, mkey) {
fromJSON(sprintf(
'http://api.mendeley.com/oapi/documents/groups/%s/docs/?consumer_key=%s&items=%s',
group_id, mkey, maxresults))$document_ids
}
pub_ids <- pubID(group_id='3762151', mkey=mkey)
h <- getCurlHandle()
# grab publication details for pub ids
pubs <- lapply(pub_ids, function(x) {
fromJSON(OAuthRequest(mc, sprintf("http://api.mendeley.com/oapi/library/groups/3762151/%s/", x),
method="GET", curl=h))
})
# parse a single publication to hmtl
parsePub(pubs[[1]], qmem)
# split pubs by year and parse all to html
sortPubsToHTML(pubs, qmem)
sortPubsToHTML(pubs, qmem, 'pubs.txt') # to a text file
sortPubsToHTML <- function(pubList, emphasize, file, overwrite=FALSE) {
require(devtools)
if(!missing(file)) {
if(!isTRUE(overwrite) && file.exists(file))
stop('File exists. Use overwrite=TRUE.', call.=FALSE)
}
if (!exists('parsePub') || !is.function(parsePub)) {
source_url('https://gist.github.com/johnbaums/7000303/raw/parsePub.R')
}
years <- sapply(pubList, '[[', 'year')
pubList <- split(pubList, factor(as.numeric(years),
levels=sort(unique(years), dec=TRUE)))
Y <- names(pubList)
out <- sapply(seq_along(pubList), function(i) {
c(sprintf("<h2 style='text-align:center;'><strong><em>%s</em></strong></h2>", Y[i]),
sapply(pubList[[i]], function(x) {
parsePub(x, emphasize=emphasize)
}))
})
if(!missing(file)) {
cat(unlist(out), sep='\n\n', file=file)
} else return(out)
}
# See https://gist.github.com/johnbaums/7000303#file-publist-r for example usage.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment