Skip to content

Instantly share code, notes, and snippets.

@sillasgonzaga
Created May 8, 2018 21:18
Show Gist options
  • Save sillasgonzaga/df816be973853e9cd2a8496926524e4b to your computer and use it in GitHub Desktop.
Save sillasgonzaga/df816be973853e9cd2a8496926524e4b to your computer and use it in GitHub Desktop.
library(tidyverse)
library(fs)
dir_path <- "projetos/site-master/content/blog/"
dir_info(dir_posts, recursive = TRUE) %>%
head() %>%
knitr::kable()
rmd_or_r_file_paths_tbl <- dir_info(dir_path, recursive = T) %>%
mutate(file_name = path_file(path)) %>%
select(file_name, path) %>%
filter(str_detect(file_name, "(\\.R|\\.Rmd)$"))
build_function_names_tbl_from_file_path <- function(path, loaded_functions_tbl) {
builder <- function(path, loaded_functions_tbl) {
readLines(path) %>%
parse_function_names() %>%
left_join(loaded_functions_tbl) %>%
filter(
!(function_name == "filter" & !(package == "dplyr"))
) %>%
mutate(package = ifelse(is.na(package), "Unknown", package))
}
safe_builder <- possibly(builder, otherwise = NA)
safe_builder(path, loaded_functions_tbl)
}
parse_function_names <- function(text, stop_words = c("")) {
parser <- function(text, stop_words) {
ret <- text %>%
str_c(collapse = " ") %>%
str_split("\\(") %>%
set_names("text") %>%
as.tibble() %>%
slice(-n()) %>%
mutate(str_split = map(text, str_split, " ")) %>%
select(-text) %>%
unnest() %>%
mutate(function_name = map_chr(str_split, ~ purrr::pluck(last(.x)))) %>%
select(function_name) %>%
separate(function_name, into = c("discard", "function_name"),
sep = "(:::|::|\n)", fill = "left") %>%
select(-discard) %>%
mutate(function_name = str_replace_all(function_name,
pattern = "[^[:alnum:]_\\.]", "")) %>%
filter(!(function_name %in% stop_words))
return(ret)
}
safe_parser <- possibly(parser, otherwise = NA)
safe_parser(text, stop_words)
}
find_loaded_packages <- function() {
ret <- search() %>%
list() %>%
set_names("search") %>%
as.tibble() %>%
separate(search, into = c("discard", "keep"), sep = ":", fill = "right") %>%
select(keep) %>%
filter(!is.na(keep)) %>%
rename(package = keep) %>%
arrange(package)
return(ret)
}
find_functions_in_package <- function(package) {
pkg_text <- paste0("package:", package)
safe_ls <- possibly(ls, otherwise = NA)
package_functions <- safe_ls(pkg_text)
if (is.na(package_functions[[1]])) return(package_functions)
ret <- package_functions %>%
as.tibble() %>%
rename(function_name = value)
return(ret)
}
map_loaded_package_functions <- function(data, col) {
col_expr <- enquo(col)
data %>%
mutate(function_name = map(!! col_expr, find_functions_in_package)) %>%
mutate(is_logical = map_dbl(function_name, is.logical)) %>%
filter(is_logical != 1) %>%
select(-is_logical) %>%
unnest()
}
loaded_functions_tbl <- find_loaded_packages() %>%
map_loaded_package_functions(package)
file_path_1 <- rmd_or_r_file_paths_tbl$path[[1]]
file_path_1
build_function_names_tbl_from_file_path(rmd_or_r_file_paths_tbl$path[[4]],
loaded_functions_tbl) %>%
glimpse()
local_function_names_tbl <- rmd_or_r_file_paths_tbl %>%
mutate(
function_name = map(path, build_function_names_tbl_from_file_path, loaded_functions_tbl),
is_logical = map_dbl(function_name, is.logical)
) %>%
filter(is_logical != 1) %>%
select(file_name, function_name) %>%
unnest() %>%
left_join(loaded_functions_tbl)
local_function_names_tbl %>% glimpse()
count_to_pct <- function(data, ..., col = n) {
grouping_vars_expr <- quos(...)
col_expr <- enquo(col)
data %>%
group_by(!!! grouping_vars_expr) %>%
mutate(pct = (!! col_expr) / sum(!! col_expr)) %>%
ungroup()
}
# extrair autor de pacoite
x <- readLines(file_path_1)
extrair_autor <- function(path){
x <- readLines(path)
linha_autor <- x %>%
str_subset("^author:") %>%
str_remove_all("author: ") %>%
str_remove_all("\\[|\\]") %>%
str_remove_all('"')
linha_autor <- str_split(linha_autor, ",")[[1]] %>% str_trim()
autor <- as.tibble(linha_autor)
colnames(autor) <- "Autor"
autor <- autor %>% mutate(path = path)
autor
}
extrair_autor(file_path_1)
df_autores <- rmd_or_r_file_paths_tbl$path %>%
map_df(extrair_autor)
df_autores <- left_join(df_autores, rmd_or_r_file_paths_tbl, by = "path")
head(df_autores)
head(rmd_or_r_file_paths_tbl)
df <- left_join(local_function_names_tbl, df_autores, by = "file_name")
head(df_autores, 1)
head(local_function_names_tbl, 1)
top_20 <- df %>%
ungroup() %>%
count(Autor, package, function_name) %>%
count_to_pct()
top_20 <- top_20 %>%
arrange(desc(n)) %>%
group_by(Autor) %>%
slice(1:20) %>%
rowid_to_column(var = "rank")
library(tidyquant)
top_20 %>%
ggplot(aes(x = n, y = fct_reorder(function_name, n), color = package)) +
geom_segment(aes(xend = 0, yend = function_name), size = 2) +
geom_point(size = 4) +
# geom_label(aes(label = paste0(function_name, "(), ",
# package, ", ",
# scales::percent(pct))),
# hjust = "inward", size = 3.5) +
expand_limits(x = 0) +
scale_color_tq() +
theme_tq() +
theme(legend.position = "none") +
facet_wrap(~ Autor, scales = "free", drop = TRUE)
ggsave("grafico-curso-r.png", height = 14, width = 11)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment