Created
May 8, 2018 21:18
-
-
Save sillasgonzaga/df816be973853e9cd2a8496926524e4b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(fs) | |
dir_path <- "projetos/site-master/content/blog/" | |
dir_info(dir_posts, recursive = TRUE) %>% | |
head() %>% | |
knitr::kable() | |
rmd_or_r_file_paths_tbl <- dir_info(dir_path, recursive = T) %>% | |
mutate(file_name = path_file(path)) %>% | |
select(file_name, path) %>% | |
filter(str_detect(file_name, "(\\.R|\\.Rmd)$")) | |
build_function_names_tbl_from_file_path <- function(path, loaded_functions_tbl) { | |
builder <- function(path, loaded_functions_tbl) { | |
readLines(path) %>% | |
parse_function_names() %>% | |
left_join(loaded_functions_tbl) %>% | |
filter( | |
!(function_name == "filter" & !(package == "dplyr")) | |
) %>% | |
mutate(package = ifelse(is.na(package), "Unknown", package)) | |
} | |
safe_builder <- possibly(builder, otherwise = NA) | |
safe_builder(path, loaded_functions_tbl) | |
} | |
parse_function_names <- function(text, stop_words = c("")) { | |
parser <- function(text, stop_words) { | |
ret <- text %>% | |
str_c(collapse = " ") %>% | |
str_split("\\(") %>% | |
set_names("text") %>% | |
as.tibble() %>% | |
slice(-n()) %>% | |
mutate(str_split = map(text, str_split, " ")) %>% | |
select(-text) %>% | |
unnest() %>% | |
mutate(function_name = map_chr(str_split, ~ purrr::pluck(last(.x)))) %>% | |
select(function_name) %>% | |
separate(function_name, into = c("discard", "function_name"), | |
sep = "(:::|::|\n)", fill = "left") %>% | |
select(-discard) %>% | |
mutate(function_name = str_replace_all(function_name, | |
pattern = "[^[:alnum:]_\\.]", "")) %>% | |
filter(!(function_name %in% stop_words)) | |
return(ret) | |
} | |
safe_parser <- possibly(parser, otherwise = NA) | |
safe_parser(text, stop_words) | |
} | |
find_loaded_packages <- function() { | |
ret <- search() %>% | |
list() %>% | |
set_names("search") %>% | |
as.tibble() %>% | |
separate(search, into = c("discard", "keep"), sep = ":", fill = "right") %>% | |
select(keep) %>% | |
filter(!is.na(keep)) %>% | |
rename(package = keep) %>% | |
arrange(package) | |
return(ret) | |
} | |
find_functions_in_package <- function(package) { | |
pkg_text <- paste0("package:", package) | |
safe_ls <- possibly(ls, otherwise = NA) | |
package_functions <- safe_ls(pkg_text) | |
if (is.na(package_functions[[1]])) return(package_functions) | |
ret <- package_functions %>% | |
as.tibble() %>% | |
rename(function_name = value) | |
return(ret) | |
} | |
map_loaded_package_functions <- function(data, col) { | |
col_expr <- enquo(col) | |
data %>% | |
mutate(function_name = map(!! col_expr, find_functions_in_package)) %>% | |
mutate(is_logical = map_dbl(function_name, is.logical)) %>% | |
filter(is_logical != 1) %>% | |
select(-is_logical) %>% | |
unnest() | |
} | |
loaded_functions_tbl <- find_loaded_packages() %>% | |
map_loaded_package_functions(package) | |
file_path_1 <- rmd_or_r_file_paths_tbl$path[[1]] | |
file_path_1 | |
build_function_names_tbl_from_file_path(rmd_or_r_file_paths_tbl$path[[4]], | |
loaded_functions_tbl) %>% | |
glimpse() | |
local_function_names_tbl <- rmd_or_r_file_paths_tbl %>% | |
mutate( | |
function_name = map(path, build_function_names_tbl_from_file_path, loaded_functions_tbl), | |
is_logical = map_dbl(function_name, is.logical) | |
) %>% | |
filter(is_logical != 1) %>% | |
select(file_name, function_name) %>% | |
unnest() %>% | |
left_join(loaded_functions_tbl) | |
local_function_names_tbl %>% glimpse() | |
count_to_pct <- function(data, ..., col = n) { | |
grouping_vars_expr <- quos(...) | |
col_expr <- enquo(col) | |
data %>% | |
group_by(!!! grouping_vars_expr) %>% | |
mutate(pct = (!! col_expr) / sum(!! col_expr)) %>% | |
ungroup() | |
} | |
# extrair autor de pacoite | |
x <- readLines(file_path_1) | |
extrair_autor <- function(path){ | |
x <- readLines(path) | |
linha_autor <- x %>% | |
str_subset("^author:") %>% | |
str_remove_all("author: ") %>% | |
str_remove_all("\\[|\\]") %>% | |
str_remove_all('"') | |
linha_autor <- str_split(linha_autor, ",")[[1]] %>% str_trim() | |
autor <- as.tibble(linha_autor) | |
colnames(autor) <- "Autor" | |
autor <- autor %>% mutate(path = path) | |
autor | |
} | |
extrair_autor(file_path_1) | |
df_autores <- rmd_or_r_file_paths_tbl$path %>% | |
map_df(extrair_autor) | |
df_autores <- left_join(df_autores, rmd_or_r_file_paths_tbl, by = "path") | |
head(df_autores) | |
head(rmd_or_r_file_paths_tbl) | |
df <- left_join(local_function_names_tbl, df_autores, by = "file_name") | |
head(df_autores, 1) | |
head(local_function_names_tbl, 1) | |
top_20 <- df %>% | |
ungroup() %>% | |
count(Autor, package, function_name) %>% | |
count_to_pct() | |
top_20 <- top_20 %>% | |
arrange(desc(n)) %>% | |
group_by(Autor) %>% | |
slice(1:20) %>% | |
rowid_to_column(var = "rank") | |
library(tidyquant) | |
top_20 %>% | |
ggplot(aes(x = n, y = fct_reorder(function_name, n), color = package)) + | |
geom_segment(aes(xend = 0, yend = function_name), size = 2) + | |
geom_point(size = 4) + | |
# geom_label(aes(label = paste0(function_name, "(), ", | |
# package, ", ", | |
# scales::percent(pct))), | |
# hjust = "inward", size = 3.5) + | |
expand_limits(x = 0) + | |
scale_color_tq() + | |
theme_tq() + | |
theme(legend.position = "none") + | |
facet_wrap(~ Autor, scales = "free", drop = TRUE) | |
ggsave("grafico-curso-r.png", height = 14, width = 11) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment