djnavarro/aggregate_rowwise_by_prefix.R

djnavarro · 2021-05-11T07:40:50Z

library(tidyverse)

# make toy data -----------------------------------------------------------

# column of n randomly sampled responses
likert_col <- function(n = 10) {
  sample(7, size = 10, replace = TRUE)
}

# toy data
dat <- tibble(
  cat_1 = likert_col(),
  cat_2 = likert_col(),
  cat_3 = likert_col(),
  dog_1 = likert_col(),
  dog_2 = likert_col()
)


# an ugly solution --------------------------------------------------------

# sums all columns that begin with prefix,
# returns a tibble with one column
prefix_aggregate <- function(prefix, dat) {
  dat %>%
    rowwise() %>%
    transmute(!!prefix := sum(c_across(starts_with(prefix)))) %>%
    ungroup()
}

# find relevant prefixes
prefixes <- names(dat) %>%
  str_remove_all("_[0-9]*$") %>%
  unique()

# apply the aggregator for each prefix then bind
agg <- prefixes %>%
  map(prefix_aggregate, dat = dat) %>%
  bind_cols()


# print -------------------------------------------------------------------

print(dat)
#> # A tibble: 10 x 5
#>    cat_1 cat_2 cat_3 dog_1 dog_2
#>    <int> <int> <int> <int> <int>
#>  1     3     7     5     7     7
#>  2     1     7     3     2     7
#>  3     6     5     4     7     1
#>  4     2     6     2     7     3
#>  5     3     1     6     7     5
#>  6     2     6     1     1     6
#>  7     4     3     1     2     2
#>  8     3     1     6     1     7
#>  9     5     1     6     1     7
#> 10     5     3     1     5     3
print(agg)
#> # A tibble: 10 x 2
#>      cat   dog
#>    <int> <int>
#>  1    15    14
#>  2    11     9
#>  3    15     8
#>  4    10    10
#>  5    10    12
#>  6     9     7
#>  7     8     4
#>  8    10     8
#>  9    12     8
#> 10     9     8

^{Created on 2021-05-11 by the reprex package (v0.3.0)}

TimTeaFan · 2021-05-11T13:09:59Z

I have a package on Github which handles similar problems. For this specific problem it has no optimal solution:

library(dplyover) # https://github.com/TimTeaFan/dplyover

dat %>% 
  transmute(over(cut_names("_[0-9]*$"),
                 ~ rowSums(select(cur_data(), starts_with(.x)))))

# A tibble: 10 x 2
     cat   dog
   <dbl> <dbl>
 1    11     4
 2    10     9
 3     6     2
 4     4     8
 5    10     9
 6     7     8
 7    12    10
 8    12     8
 9    17     5
10    13     2

I wonder why across is not working correctly:

dat %>% 
  transmute(over(cut_names("_[0-9]*$"),
                 ~ rowSums(across(starts_with(.x)))))

# A tibble: 10 x 2
     cat   dog
   <dbl> <dbl>
 1    11    11
 2    10    10
 3     6     6
 4     4     4
 5    10    10
 6     7     7
 7    12    12
 8    12    12
 9    17    17
10    13    13

I'm thinking about how a function should look like to handles this problem in an optimal way.

TimTeaFan · 2021-05-11T15:33:14Z

I think this would be a nice syntax to solve similar problems. I don't like the name though. Maybe there is something better than fold.

# `fold` does not exist yet
dat %>% 
  transmute(fold(starts_with("cat"),
                 list(sum = ~ rowSums(.x),
                      mean = ~ rowMeans(.x))))

# A tibble: 10 x 2
   cat_sum cat_mean
     <dbl>    <dbl>
 1      11     3.67
 2      10     3.33
 3       6     2   
 4       4     1.33
 5      10     3.33
 6       7     2.33
 7      12     4   
 8      12     4   
 9      17     5.67
10      13     4.33

# `fold_over` does not exist yet
dat %>% 
  transmute(fold_over(cut_names("_[0-9]*$"),
                      ~ starts_with(.x),
                      ~ rowSums(.x)))

# A tibble: 10 x 2
     cat   dog
   <dbl> <dbl>
 1    11    11
 2    10    10
 3     6     6
 4     4     4
 5    10    10
 6     7     7
 7    12    12
 8    12    12
 9    17    17
10    13    13

djnavarro/aggregate_rowwise_by_prefix.R

Select an option

No results found

Select an option

No results found

djnavarro commented May 11, 2021

Uh oh!

TimTeaFan commented May 11, 2021 •

edited

Loading

Uh oh!

TimTeaFan commented May 11, 2021 •

edited

Loading

Uh oh!

	library(tidyverse)

	# make toy data -----------------------------------------------------------

	# column of n randomly sampled responses
	likert_col <- function(n = 10) {
	sample(7, size = 10, replace = TRUE)
	}

	# toy data
	dat <- tibble(
	cat_1 = likert_col(),
	cat_2 = likert_col(),
	cat_3 = likert_col(),
	dog_1 = likert_col(),
	dog_2 = likert_col()
	)


	# an ugly solution --------------------------------------------------------

	# sums all columns that begin with prefix,
	# returns a tibble with one column
	prefix_aggregate <- function(prefix, dat) {
	dat %>%
	rowwise() %>%
	transmute(!!prefix := sum(c_across(starts_with(prefix)))) %>%
	ungroup()
	}

	# find relevant prefixes
	prefixes <- names(dat) %>%
	str_remove_all("_[0-9]*$") %>%
	unique()

	# apply the aggregator for each prefix then bind
	agg <- prefixes %>%
	map(prefix_aggregate, dat = dat) %>%
	bind_cols()


	# print -------------------------------------------------------------------

	print(dat)
	print(agg)

djnavarro/aggregate_rowwise_by_prefix.R

djnavarro commented May 11, 2021

Uh oh!

TimTeaFan commented May 11, 2021 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

TimTeaFan commented May 11, 2021 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

TimTeaFan commented May 11, 2021 •

edited

Loading

TimTeaFan commented May 11, 2021 •

edited

Loading