Last active
December 25, 2015 15:39
-
-
Save johnbaums/7000303 to your computer and use it in GitHub Desktop.
parsePub: Parse publication details (provided in JSON format) returned by sortPubsToHTML: sorts pubs by year and parse with parsePub, optionally sending output to a text file. Here, pubs refers to a list of publications in JSON format, each of which is returned by the Mendeley Group Document Details method - http://apidocs.mendeley.com/home/user…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parsePub <- function(x, emphasize=NULL) { | |
if (!is.null(emphasize)) { | |
lname <- sapply(strsplit(sapply(x$authors, '[[', 2), '\\s+'), tail, 1) | |
finit <- substr(sapply(x$authors, '[[', 1), 1, 1) | |
em.lname <- sapply(strsplit(emphasize, '\\s+'), tail, 1) | |
em.finit <- substr(emphasize, 1, 1) | |
em <- !is.na(match(paste(finit, lname), paste(em.finit, em.lname))) | |
} | |
authsToVec <- function(y, emphasize=emphasize) { | |
initials <- paste0(substring(strsplit(x$authors[[y]]['forename'], | |
'\\s+')[[1]], 1, 1), '.', collapse='') | |
sprintf('%s%s, %s%s', | |
ifelse(!is.null(emphasize) && em[y], '<strong>', ''), | |
x$authors[[y]]['surname'], | |
initials, | |
ifelse(!is.null(emphasize) && em[y], '</strong>', '')) | |
} | |
authvec <- sapply(seq_along(x$authors), authsToVec, emphasize=emphasize) | |
authstring <- gsub(',([^,]*,[^,]*)$', ' &\\1', paste(authvec, collapse=', ')) | |
if (is.null(x$volume)) { | |
sprintf("<p>%s (%s), <a href='%s'>%s.</a> <em>%s</em>.</p>", | |
authstring, x$year, x$url, x$title, x$published_in) | |
} else { | |
sprintf("<p>%s (%s), <a href='%s'>%s.</a> <em>%s</em>, %s: %s.</p>", | |
authstring, x$year, x$url, x$title, x$published_in, | |
x$volume, x$pages) | |
} | |
} | |
# See https://gist.github.com/johnbaums/7000303#file-publist-r for example usage. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(devtools) | |
library(RMendeley) | |
library(ROAuth) | |
mkey <- 'mendeley consumer key' # see http://apidocs.mendeley.com/ | |
msecret <- 'mendeley consumer secret' | |
#mc <- mendeley_auth(mkey, msecret) | |
#save(mc, file="~/../Dropbox/credentials/mendeley_credential.rdata") | |
load("~/../Dropbox/credentials/mendeley_credential.rdata") | |
# grab function to scrape QAECO member names from website | |
source_url('https://gist.github.com/johnbaums/32d902b7ecac0dac9ec4/raw/qaecoMembers.R') | |
qmem <- qaecoMembers() | |
# grab functions to parse pubs | |
source_url('https://gist.github.com/johnbaums/7000303/raw/sortPubsToHTML.R') | |
source_url('https://gist.github.com/johnbaums/7000303/raw/parsePub.R') | |
# grab Mendeley publication IDs | |
pubID <- function(group_id, maxresults=500, mkey) { | |
fromJSON(sprintf( | |
'http://api.mendeley.com/oapi/documents/groups/%s/docs/?consumer_key=%s&items=%s', | |
group_id, mkey, maxresults))$document_ids | |
} | |
pub_ids <- pubID(group_id='3762151', mkey=mkey) | |
h <- getCurlHandle() | |
# grab publication details for pub ids | |
pubs <- lapply(pub_ids, function(x) { | |
fromJSON(OAuthRequest(mc, sprintf("http://api.mendeley.com/oapi/library/groups/3762151/%s/", x), | |
method="GET", curl=h)) | |
}) | |
# parse a single publication to hmtl | |
parsePub(pubs[[1]], qmem) | |
# split pubs by year and parse all to html | |
sortPubsToHTML(pubs, qmem) | |
sortPubsToHTML(pubs, qmem, 'pubs.txt') # to a text file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sortPubsToHTML <- function(pubList, emphasize, file, overwrite=FALSE) { | |
require(devtools) | |
if(!missing(file)) { | |
if(!isTRUE(overwrite) && file.exists(file)) | |
stop('File exists. Use overwrite=TRUE.', call.=FALSE) | |
} | |
if (!exists('parsePub') || !is.function(parsePub)) { | |
source_url('https://gist.github.com/johnbaums/7000303/raw/parsePub.R') | |
} | |
years <- sapply(pubList, '[[', 'year') | |
pubList <- split(pubList, factor(as.numeric(years), | |
levels=sort(unique(years), dec=TRUE))) | |
Y <- names(pubList) | |
out <- sapply(seq_along(pubList), function(i) { | |
c(sprintf("<h2 style='text-align:center;'><strong><em>%s</em></strong></h2>", Y[i]), | |
sapply(pubList[[i]], function(x) { | |
parsePub(x, emphasize=emphasize) | |
})) | |
}) | |
if(!missing(file)) { | |
cat(unlist(out), sep='\n\n', file=file) | |
} else return(out) | |
} | |
# See https://gist.github.com/johnbaums/7000303#file-publist-r for example usage. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment