Last active
August 22, 2023 03:27
-
-
Save olivroy/d671e54a817f1e6cec31505491462b70 to your computer and use it in GitHub Desktop.
Explore function use in `openxlsx2`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this script counts the frequency of function use in order to create a function index that make more sense. | |
# We want to look at what is more user facing. | |
# for now the data source we use is the examples, vignettes, README etc. | |
# Maybe some blog posts in the wild may be used eventually. | |
# TODO go get functions that are used in chaining, and add back `wb_` prefix | |
# TODO reorder R6 methods so that internal ones are not shown first in documentation | |
# TODO add_thread, suggest `create_person()` or link to `wb_creators()`, `wb_person()`, | |
# TODO validate that `wb_dims()`, `wb_color()` are similar to | |
# TODO validate vignette content. discuss if the long / complicated way to add styling is relevant in the vignette. | |
# TODO prefixer https://github.com/dreamRs/prefixer | |
library(stringr) | |
library(dplyr) | |
# exported functions of openxlsx2 | |
# This has the unforseen advantage of ignoring functions from other packages. | |
fns <- readLines(fs::path_expand("~/Documents/rrr-forks/openxlsx2/NAMESPACE")) | |
fns_export <- fns %>% str_subset("export") %>% str_extract("\\((.+)\\)", group = 1) | |
wb_wrappers <- str_subset(fns_export, "wb_") %>% str_replace('wb_', "\\\\$") | |
fns_regex <- c(fns_export, wb_wrappers) %>% | |
paste0("\\(", collapse = "|") | |
files <- fs::dir_ls(path = "~/Documents/rrr-forks/openxlsx2", recurse = T, regexp = ".R$|.Rmd", type = "file") %>% | |
fs::path_filter(regexp = "doc/|inst/", invert = TRUE) | |
file_content <- files %>% | |
fs::path_rel() %>% | |
purrr::set_names() %>% | |
purrr::map(\(x) readLines(x, encoding = "UTF-8")) %>% | |
purrr::map(\(x) tibble::enframe(x, name = NULL)) %>% | |
dplyr::bind_rows(.id = "file") | |
file_content2 <- file_content %>% | |
dplyr::mutate( | |
file = fs::path(file), | |
fn = str_extract_all(value, fns_regex) | |
) | |
file_content3 <- file_content2 %>% | |
dplyr::filter(lengths(fn) != 0) %>% | |
tidyr::unnest(fn) %>% | |
mutate( | |
fn = str_remove(fn, "\\($"), | |
fn = str_replace(fn, "\\$", "wb_"), # replace back wrapper functions | |
fn = str_replace(fn, "colour", "color"), # colour and color are the same | |
fn_location = fs::path_dir(file), | |
is_in_doc = str_detect(value, "^#'"), | |
type = case_when( | |
fn_location == "R" & is_in_doc ~ "examples", | |
fn_location == "tests/testthat" ~ "tests", | |
.default = fn_location | |
), | |
value = NULL, | |
fn_location = NULL, | |
is_in_doc = NULL | |
) %>% | |
relocate(type) | |
file_content4 <- file_content3 %>% | |
count(fn, type, sort = TRUE) %>% | |
filter(type != "R") %>% | |
mutate(n2 = sum(n), .by = fn) %>% | |
arrange(desc(n2)) %>% | |
select(-n2) | |
file_content4 %>% | |
filter(type != "tests") %>% | |
summarise(n = sum(n), .by = fn) %>% | |
filter(!str_detect(fn, "xml|create|xlsx"), str_detect(fn, "wb")) %>% | |
arrange(desc(n)) %>% | |
gt::gt(groupname_col = "fn") %>% | |
gt::tab_options(row_group.as_column = TRUE) | |
# I see that xml_functions seem to be used a lot in internal code. | |
# Ignoring internal code for the sake o | |
# the most popular functions are `wb_workbook()`, `wb_color()` | |
file_content4 %>% | |
gt::gt(groupname_col = "fn") %>% | |
gt::tab_options(row_group.as_column = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment