Skip to content

Instantly share code, notes, and snippets.

@alejandrohagan
Last active March 1, 2025 20:12
Show Gist options
  • Save alejandrohagan/a0e1443c1799c264b6213abbe0c7ca76 to your computer and use it in GitHub Desktop.
Save alejandrohagan/a0e1443c1799c264b6213abbe0c7ca76 to your computer and use it in GitHub Desktop.
how to apply function to nested dataset when nest_by column is needed in function
library(tidyverse)
fun <- function(x) {
if (any(x$Species == "setosa")) {
tail(x, n = 3) |> select(Petal.Length)
} else {
head(x, n = 3) |> select(Petal.Length)
}
}
validation_tbl <- iris |>
group_by(Species) |>
do(fun(.))
method1_tbl <- iris |>
mutate(
Species2=Species
) |>
nest_by(Species2) |>
mutate(
fn=list(fun(data))
) |>
unnest(fn) |>
select(
Species=Species2
,Petal.Length
)
all.equal(validation_tbl,method1_tbl)
fun2 <- function(data,species) {
if (any(species== "setosa")) {
tail(data, n = 3) |> select(Petal.Length)
} else {
head(data, n = 3) |> select(Petal.Length)
}
}
method2_tbl <- iris |>
nest_by(Species) |>
mutate(
fn=list(fun2(data,Species))
) |>
unnest(fn) |>
select(
Species
,Petal.Length
)
all.equal(validation_tbl,method2_tbl)
## with map()
fun3 <- function(x) {
if (any(x$Species == "setosa")) {
tail(x, n = 3) |> select(Species,Petal.Length)
} else {
head(x, n = 3) |> select(Species,Petal.Length)
}
}
split_tbl <- iris |>
group_split(Species)
method3_tbl <- map(.x = split_tbl,.f = \(.x) fun3(.x)) |>
purrr::list_rbind()
all.equal(validation_tbl |> ungroup(),method3_tbl)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment