Skip to content

Instantly share code, notes, and snippets.

@muschellij2
Created October 12, 2017 05:38
Show Gist options
  • Save muschellij2/7a21416b709914f6e4601dfc534647ad to your computer and use it in GitHub Desktop.
Save muschellij2/7a21416b709914f6e4601dfc534647ad to your computer and use it in GitHub Desktop.
Parse SystemRequirements fields from R Packages
rm(list=ls())
# devtools::install_github("metacran/crandb")
library(crandb)
# pack = package("fftw")
# sysreq = pack$SystemRequirements
library(magrittr)
DB <- function(api, head = 1e6, tail = head) {
paste0("http://crandb.r-pkg.org", "/", api) %>%
httr::GET() %>%
httr::content()
}
system.time({
all_data = DB("/-/all")
})
n_packages = length(all_data)
get_latest_desc = function(x) {
latest_version = x$latest
vers = x$versions
if (!(latest_version %in% names(vers))) {
latest_version = length(vers)
}
res = x$versions[[latest_version]]
return(res)
}
get_sysreqs = function(x) {
res = get_latest_desc(x)
sysreqs = res$SystemRequirements
return(sysreqs)
}
all_sysreqs = lapply(all_data, get_sysreqs)
real_sysreqs = unlist(all_sysreqs)
real_sysreqs = real_sysreqs[ !(real_sysreqs %in% "")]
n_with_reqs = length(real_sysreqs)
##############################
# Split them out
##############################
reqs = real_sysreqs
reqs = trimws(reqs)
reqs = tolower(reqs)
reqs = gsub("\\s+", " ", reqs)
reqs = gsub(", optional", "", reqs)
reqs = strsplit(reqs, ",")
reqs = lapply(reqs, trimws)
reqs = lapply(reqs, function(x) {
unlist(strsplit(x, ";"))
})
reqs = lapply(reqs, function(x) {
sub("\\(>.*\\)", "", x)
})
reqs = lapply(reqs, function(x) {
x = sub("\\(http.*\\).*$", "", x)
x = sub("<http.*>.*$", "", x)
x = sub("- http.*$", "", x)
x
})
reqs = lapply(reqs, function(x) {
sub(" (>.*)$", "", x)
})
reqs = lapply(reqs, function(x) {
gsub("\\s+", " ", x)
})
reqs = lapply(reqs, trimws)
reqs = lapply(reqs, function(x) {
x = sub("gnu make", "make", x)
trimws(x)
})
reqs = unlist(reqs)
tab = sort(table(reqs))
req_words = strsplit(reqs, " ")
req_words = unlist(req_words)
req_words = gsub("[.]$", "", req_words)
req_words = gsub(":$", "", req_words)
req_words = gsub("^:", "", req_words)
req_words = gsub("'s$", "", req_words)
req_words = gsub("^\\(", "", req_words)
req_words = gsub("\\)$", "", req_words)
stop_words = c(
tm::stopwords(),
"must",
"headers",
"package",
"scientific",
"version",
"higher",
"support",
"(rpm)",
"(deb)",
"requires",
"library",
"libraries",
"development",
"available",
"can",
"well", "will",
"website",
"readme", "file", "one",
"via",
"users",
"tools",
"toolkit",
"source",
"shared",
"release",
"provided",
"preferred",
"may",
"install",
"installation",
"instructions",
"drivers", "downloaded",
"built", "client",
"known",
""
)
req_words = req_words[ !(req_words %in% stop_words)]
tab_words = sort(table(req_words))
# chunk_size = 1000
# from =
# first_pack = list_packages(limit =1,zformat = "latest")
# xx = list_packages(format = "latest")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment