Code for reproducible example around github functions
library(stringi)
library(tidyr)
library(dplyr)
library(stringr)
example <- data.frame(LP=1:10000) %>%
rowwise() %>%
mutate(lorem1 = stringi::stri_rand_lipsum(1, start_lipsum = FALSE)) %>%
mutate(lorem2 = stringi::stri_rand_lipsum(1, start_lipsum = FALSE)) %>%
dplyr::slice(rep(1:n(), each = 10)) %>%
transform( lorem1 = sample(lorem1) )
example_txt <- example %>%
dplyr::group_by(`LP`) %>%
dplyr::mutate(colapsed_document_titles = paste0(`lorem1`, collapse = " "))
filter_statement <- "grepl('lorem|ipsum', text_mining, ignore.case = TRUE)"
filter_keywords <- function(data,selected_cols, filter_statement, narrow_sequence_match, keywords){
tictoc::tic() #start measuring time
result <- data %>%
tidyr::unite(text_mining, {{selected_cols}} , sep = " ", remove=FALSE) %>%
dplyr::mutate(!! (narrow_sequence_match) := sapply(stringr::str_extract_all(.data$text_mining, regex({{keywords}}, ignore_case=TRUE)), toString)) %>%
dplyr::filter(eval(rlang::parse_expr(filter_statement)))%>%
dplyr::select(-(.data$`text_mining`))
tictoc::toc() #finish neasuring time
return(result)
}
df1 <- filter_keywords(example, c(LP, lorem1, lorem2), filter_statement, "keywords", "lorem|ipsum")
df2 <- filter_keywords(example_txt, c(LP, lorem1, lorem2), filter_statement, "keywords", "lorem|ipsum")
Example from r - How to pass a filter statement as a function parameter in dplyr using quosure - Stack Overflow
data("PlantGrowth")
myfunc <- function(df, filter_statement) {
df %>% dplyr::filter(eval(rlang::parse_expr(filter_statement)))
}
identical(myfunc(PlantGrowth, "group %in% c('trt1', 'trt2')"),
PlantGrowth %>% filter(group %in% c('trt1', 'trt2')))
my_plant <- myfunc(PlantGrowth, "group %in% c('trt1', 'trt2')")
my_plant <- myfunc(PlantGrowth, "group == 'ctrl'")
⚠️ ** Forgetting about%>%operator may lead to cryptic errors
Error in `filter_keywords()`:
! Can't subset `.data` outside of a data mask context.