Skip to content

Instantly share code, notes, and snippets.

@johnbaums
Last active December 25, 2015 11:29
Show Gist options
  • Save johnbaums/6969161 to your computer and use it in GitHub Desktop.
Save johnbaums/6969161 to your computer and use it in GitHub Desktop.
Return genera listed by theplantlist.org. Provide a vector of family names with argument 'family' to restrict to one or more families.
tpl_genera <- function (family=NULL) {
require(RCurl)
require(XML)
require(plyr)
if (!is.null(family)) {
if (!exists('tpl_families') || !is.function(tpl_families)) {
stop(paste('If providing family names, the function "tpl_families" is required.',
'See https://gist.github.com/johnbaums/6958504.', sep='\n'),
call.=FALSE)
}
families <- tpl_families()
if (all(!tolower(family) %in% tolower(families$family))) {
stop(paste('Requested families not found on TPL.',
'Use tpl_families() to list plant families indexed by TPL.'),
call.=FALSE)
}
if (any(!tolower(family) %in% tolower(families$family))) {
warning(sprintf('Requested families not found on TPL: %s.\n%s',
paste(family[!tolower(family) %in% tolower(families)],
collapse=', '),
'Use tpl_families() to list plant families indexed by TPL.'),
call.=FALSE)
}
families <- families[tolower(families$family) %in% tolower(family), ]
families$urls <- file.path('http://www.theplantlist.org/browse',
substring(families$group, 1, 1),
families$family, '')
get.genera <- function(u) {
doc <- getURL(u)
doc <- htmlParse(doc)
xpathSApply(doc, "//ul[@id='nametree']//a", xmlValue)
}
genera <- llply(families$urls, get.genera, .progress='text')
data.frame(family=factor(rep(families$family, sapply(genera, length))),
genus=unlist(genera), stringsAsFactors=FALSE)
} else {
doc <- getURL('http://www.theplantlist.org/browse/-/-/')
doc <- htmlParse(doc)
ns.genera <- getNodeSet(doc, "//ul[@id='nametree']//a")
genera <- xmlSApply(ns.genera, xmlValue)
ns.family <- getNodeSet(doc, "//ul[@id='nametree']//i")
family <- factor(xmlSApply(ns.family, xmlValue))
data.frame(family=family, genus=genera)
}
}
# Examples
all.genera <- tpl_genera()
some.genera <- tpl_genera(c("Boweniaceae", "Davalliaceae",
"Myrtaceae", "Poaceae", "Rapateaceae"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment