jmclawson · November 4, 2023 16:31
diff --git a/unnest_without_caps.R b/unnest_without_caps.R
 unnest_without_caps <- function(
    df, 
    column = "text") {
  
  full <- df |> 
    tidytext::unnest_tokens(word, {{column}}, to_lower = FALSE) 
  
  big <- full |> 
    dplyr::filter(str_detect(word, "^[A-Z]")) |> 
    dplyr::pull(word)
  
  small <- full |> 
    dplyr::filter(str_detect(word, "^[a-z]")) |> 
    dplyr::pull(word)
  
  only_caps <- base::setdiff(tolower(big), small)
  
  df |> 
    tidytext::unnest_tokens(word, {{column}}) |> 
    dplyr::filter(!word %in% only_caps)
 }
	unnest_without_caps <- function(
	df,
	column = "text") {

	full <- df \|>
	tidytext::unnest_tokens(word, {{column}}, to_lower = FALSE)

	big <- full \|>
	dplyr::filter(str_detect(word, "^[A-Z]")) \|>
	dplyr::pull(word)

	small <- full \|>
	dplyr::filter(str_detect(word, "^[a-z]")) \|>
	dplyr::pull(word)

	only_caps <- base::setdiff(tolower(big), small)

	df \|>
	tidytext::unnest_tokens(word, {{column}}) \|>
	dplyr::filter(!word %in% only_caps)
	}
No results found