Skip to content

Instantly share code, notes, and snippets.

@noamross
Last active July 16, 2021 06:48
Show Gist options
  • Save noamross/89bc3e8c114b02bec09fd25272601a2c to your computer and use it in GitHub Desktop.
Save noamross/89bc3e8c114b02bec09fd25272601a2c to your computer and use it in GitHub Desktop.
library(desc)
library(httr)
library(purrr)
library(stringi)
library(methods)
library(dplyr)
library(tidyr)
pkgs <- rownames(available.packages())
if(!dir.exists("DESCRIPTIONS")) dir.create("DESCRIPTIONS")
walk(pkgs, function(p) {
if(!file.exists(paste0("DESCRIPTIONS/", p))) {
GET(paste0("https://raw.githubusercontent.com/cran/", p, "/master/DESCRIPTION"),
write_disk(paste0("DESCRIPTIONS/", p)))
}
})
pd <- data_frame(
package = list.files("DESCRIPTIONS/"),
role = map(package, function(p) {
unique(stri_extract_all_regex(
desc_get("Author", paste0("DESCRIPTIONS/", p)),
"(?<=(\\[|,\\s))\\b[a-z]{3}\\b(?=(,|\\]))"))[[1]]
}))
tab <- pd %>% unnest() %>%
filter(!is.na(role)) %>%
group_by(package, role) %>%
summarize(n= n()) %>%
group_by() %>%
count(role) %>%
rename(pkgs_with=nn) %>%
left_join(utils:::MARC_relator_db, by=c("role"="code")) %>%
mutate(role = if_else(role %in% utils:::MARC_relator_db_codes_used_with_R, role, paste0(role, "*"))) %>%
arrange(desc(pkgs_with)) %>%
select(-usage, -description) %>%
rename(code = role, `CRAN packages with`=pkgs_with) %>%
pander::pander_return(style="grid", justify="left")
clipr::write_clip(tab)
pd %>% unnest() %>%
filter(!is.na(role), !(role %in% utils:::MARC_relator_db_codes_used_with_R), role != "rev") %>%
pander::pander_return(style="grid", justify="left") %>% clipr::write_clip()
@maelle
Copy link

maelle commented Jul 16, 2021

Another take at this 🙂

crandb <- tools::CRAN_package_db()
crandb <- janitor::clean_names(crandb)
crandb <- dplyr::filter(crandb, !is.na(authors_r))

get_roles <- function(people_string) {
  people <- eval(parse(text = people_string))
  list(format(people, include = "role", braces = list(role = c("", ""))))
}

crandb <- dplyr::rowwise(crandb)
crandb <- dplyr::mutate(
  crandb,
  role = get_roles(authors_r)
)
crandb <- dplyr::select(crandb, package, role)
crandb <- tidyr::unnest(crandb, role)
crandb <- dplyr::mutate(crandb, role = strsplit(role, split = ", "))
crandb <- tidyr::unnest(crandb, role)
marc_db <- utils:::MARC_relator_db
marc_db <- dplyr::filter(marc_db, code %in% utils:::MARC_relator_db_codes_used_with_R)
crandb <- dplyr::filter(crandb, role %in% utils:::MARC_relator_db_codes_used_with_R)
newdb <- fuzzyjoin::regex_left_join(marc_db, crandb, by = c(code = "role"))
dplyr::count(newdb, code, sort = TRUE) |> knitr::kable()
code n
aut 19608
cre 10173
ctb 9307
cph 3757
ths 344
fnd 323
dtc 221
rev 191
trl 106
ctr 83
com 31

Created on 2021-07-16 by the reprex package (v2.0.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment