Skip to content

Instantly share code, notes, and snippets.

@Aariq
Created October 29, 2024 17:09
Show Gist options
  • Save Aariq/b5adeb0af436c3d8a9a5f889bf69921e to your computer and use it in GitHub Desktop.
Save Aariq/b5adeb0af436c3d8a9a5f889bf69921e to your computer and use it in GitHub Desktop.
library(targets)
tar_dir({
#create 10 identical CSVs as a demo
orig <- tibble::tibble(group = c("A", "A", "A", "B", "B", "C"),
value = 1)
purrr::walk(c(1:10), \(x) write_csv(orig, paste0(x, "-data.csv")))
#_targets.R
tar_script({
library(targets)
library(fs)
library(readr)
library(dplyr)
list(
tar_target(files, dir_ls(glob = "*.csv")),
tar_target(
df,
read_csv(files),
pattern = map(files), #=do this for each file
iteration = "vector" #combine into a single data frame when loaded
),
tar_target(
df_grouped,
df |> group_by(group) |> tar_group(),
iteration = "group", #downstream targets should iterate over groups
# pattern = map(files) #this errors, can't use dynamic branching
),
tar_target(
summary_per_group,
summarize(df_grouped, sum = sum(value)),
pattern = map(df_grouped) #=do this for each group in df_grouped
)
)
})
tar_make()
# tar_read(df) #a single dataframe with 60 rows (combined because iteration = "vector")
# tar_read(df_grouped) #a single dataframe with 60 rows and a tar_group column
tar_read(summary_per_group) #a single dataframe with 3 rows (one per tar_group) and 1 column
})
# https://gist.github.com/Aariq/b5adeb0af436c3d8a9a5f889bf69921e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment