Skip to content

Instantly share code, notes, and snippets.

@olivroy
Last active August 22, 2023 03:27
Show Gist options
  • Save olivroy/d671e54a817f1e6cec31505491462b70 to your computer and use it in GitHub Desktop.
Save olivroy/d671e54a817f1e6cec31505491462b70 to your computer and use it in GitHub Desktop.
Explore function use in `openxlsx2`
# this script counts the frequency of function use in order to create a function index that make more sense.
# We want to look at what is more user facing.
# for now the data source we use is the examples, vignettes, README etc.
# Maybe some blog posts in the wild may be used eventually.
# TODO go get functions that are used in chaining, and add back `wb_` prefix
# TODO reorder R6 methods so that internal ones are not shown first in documentation
# TODO add_thread, suggest `create_person()` or link to `wb_creators()`, `wb_person()`,
# TODO validate that `wb_dims()`, `wb_color()` are similar to
# TODO validate vignette content. discuss if the long / complicated way to add styling is relevant in the vignette.
# TODO prefixer https://github.com/dreamRs/prefixer
library(stringr)
library(dplyr)
# exported functions of openxlsx2
# This has the unforseen advantage of ignoring functions from other packages.
fns <- readLines(fs::path_expand("~/Documents/rrr-forks/openxlsx2/NAMESPACE"))
fns_export <- fns %>% str_subset("export") %>% str_extract("\\((.+)\\)", group = 1)
wb_wrappers <- str_subset(fns_export, "wb_") %>% str_replace('wb_', "\\\\$")
fns_regex <- c(fns_export, wb_wrappers) %>%
paste0("\\(", collapse = "|")
files <- fs::dir_ls(path = "~/Documents/rrr-forks/openxlsx2", recurse = T, regexp = ".R$|.Rmd", type = "file") %>%
fs::path_filter(regexp = "doc/|inst/", invert = TRUE)
file_content <- files %>%
fs::path_rel() %>%
purrr::set_names() %>%
purrr::map(\(x) readLines(x, encoding = "UTF-8")) %>%
purrr::map(\(x) tibble::enframe(x, name = NULL)) %>%
dplyr::bind_rows(.id = "file")
file_content2 <- file_content %>%
dplyr::mutate(
file = fs::path(file),
fn = str_extract_all(value, fns_regex)
)
file_content3 <- file_content2 %>%
dplyr::filter(lengths(fn) != 0) %>%
tidyr::unnest(fn) %>%
mutate(
fn = str_remove(fn, "\\($"),
fn = str_replace(fn, "\\$", "wb_"), # replace back wrapper functions
fn = str_replace(fn, "colour", "color"), # colour and color are the same
fn_location = fs::path_dir(file),
is_in_doc = str_detect(value, "^#'"),
type = case_when(
fn_location == "R" & is_in_doc ~ "examples",
fn_location == "tests/testthat" ~ "tests",
.default = fn_location
),
value = NULL,
fn_location = NULL,
is_in_doc = NULL
) %>%
relocate(type)
file_content4 <- file_content3 %>%
count(fn, type, sort = TRUE) %>%
filter(type != "R") %>%
mutate(n2 = sum(n), .by = fn) %>%
arrange(desc(n2)) %>%
select(-n2)
file_content4 %>%
filter(type != "tests") %>%
summarise(n = sum(n), .by = fn) %>%
filter(!str_detect(fn, "xml|create|xlsx"), str_detect(fn, "wb")) %>%
arrange(desc(n)) %>%
gt::gt(groupname_col = "fn") %>%
gt::tab_options(row_group.as_column = TRUE)
# I see that xml_functions seem to be used a lot in internal code.
# Ignoring internal code for the sake o
# the most popular functions are `wb_workbook()`, `wb_color()`
file_content4 %>%
gt::gt(groupname_col = "fn") %>%
gt::tab_options(row_group.as_column = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment