Skip to content

Instantly share code, notes, and snippets.

@njtierney
Created November 18, 2024 05:58
Show Gist options
  • Save njtierney/8b165d26fddbfb8793140c9816a699e5 to your computer and use it in GitHub Desktop.
Save njtierney/8b165d26fddbfb8793140c9816a699e5 to your computer and use it in GitHub Desktop.
library(tidyverse)
# example lagging code
n <- 100

grid_cov <- expand_grid(
  covariates = c("rainfall", "temperature"),
  years = 2000:2022,
  row = seq_len(n)
) |>
  mutate(
    value = runif(n())
  )

grid_cov
#> # A tibble: 4,600 × 4
#>    covariates years   row  value
#>    <chr>      <int> <int>  <dbl>
#>  1 rainfall    2000     1 0.585 
#>  2 rainfall    2000     2 0.0466
#>  3 rainfall    2000     3 0.696 
#>  4 rainfall    2000     4 0.762 
#>  5 rainfall    2000     5 0.767 
#>  6 rainfall    2000     6 0.554 
#>  7 rainfall    2000     7 0.723 
#>  8 rainfall    2000     8 0.199 
#>  9 rainfall    2000     9 0.199 
#> 10 rainfall    2000    10 0.672 
#> # ℹ 4,590 more rows

wider_grid_cov <- grid_cov |>
  pivot_wider(
    names_from = c(covariates, years),
    values_from = value
  )

wider_grid_cov
#> # A tibble: 100 × 47
#>      row rainfall_2000 rainfall_2001 rainfall_2002 rainfall_2003 rainfall_2004
#>    <int>         <dbl>         <dbl>         <dbl>         <dbl>         <dbl>
#>  1     1        0.585         0.855         0.698         0.133          0.656
#>  2     2        0.0466        0.798         0.277         0.857          0.709
#>  3     3        0.696         0.193         0.423         0.566          0.301
#>  4     4        0.762         0.537         0.179         0.691          0.669
#>  5     5        0.767         0.0191        0.980         0.0526         0.727
#>  6     6        0.554         0.450         0.529         0.800          0.815
#>  7     7        0.723         0.720         0.521         0.673          0.104
#>  8     8        0.199         0.192         0.887         0.106          0.367
#>  9     9        0.199         0.229         0.0940        0.427          0.940
#> 10    10        0.672         0.163         0.0945        0.470          0.425
#> # ℹ 90 more rows
#> # ℹ 41 more variables: rainfall_2005 <dbl>, rainfall_2006 <dbl>,
#> #   rainfall_2007 <dbl>, rainfall_2008 <dbl>, rainfall_2009 <dbl>,
#> #   rainfall_2010 <dbl>, rainfall_2011 <dbl>, rainfall_2012 <dbl>,
#> #   rainfall_2013 <dbl>, rainfall_2014 <dbl>, rainfall_2015 <dbl>,
#> #   rainfall_2016 <dbl>, rainfall_2017 <dbl>, rainfall_2018 <dbl>,
#> #   rainfall_2019 <dbl>, rainfall_2020 <dbl>, rainfall_2021 <dbl>, …

dat <- tibble(
  row = seq_len(n),
  obs = runif(n),
  start_year = sample(2000:2022, size = n, replace = TRUE)
)

dat
#> # A tibble: 100 × 3
#>      row    obs start_year
#>    <int>  <dbl>      <int>
#>  1     1 0.680        2011
#>  2     2 0.793        2011
#>  3     3 0.652        2006
#>  4     4 0.316        2003
#>  5     5 0.656        2009
#>  6     6 0.269        2017
#>  7     7 0.892        2013
#>  8     8 0.962        2018
#>  9     9 0.428        2020
#> 10    10 0.0542       2018
#> # ℹ 90 more rows

example_covariates
#> Error: object 'example_covariates' not found

example_covariates <- left_join(
  dat,
  wider_grid_cov,
  by = "row"
) |>
  mutate(
    coffee = runif(n())
  )

example_covariates
#> # A tibble: 100 × 50
#>      row    obs start_year rainfall_2000 rainfall_2001 rainfall_2002
#>    <int>  <dbl>      <int>         <dbl>         <dbl>         <dbl>
#>  1     1 0.680        2011        0.585         0.855         0.698 
#>  2     2 0.793        2011        0.0466        0.798         0.277 
#>  3     3 0.652        2006        0.696         0.193         0.423 
#>  4     4 0.316        2003        0.762         0.537         0.179 
#>  5     5 0.656        2009        0.767         0.0191        0.980 
#>  6     6 0.269        2017        0.554         0.450         0.529 
#>  7     7 0.892        2013        0.723         0.720         0.521 
#>  8     8 0.962        2018        0.199         0.192         0.887 
#>  9     9 0.428        2020        0.199         0.229         0.0940
#> 10    10 0.0542       2018        0.672         0.163         0.0945
#> # ℹ 90 more rows
#> # ℹ 44 more variables: rainfall_2003 <dbl>, rainfall_2004 <dbl>,
#> #   rainfall_2005 <dbl>, rainfall_2006 <dbl>, rainfall_2007 <dbl>,
#> #   rainfall_2008 <dbl>, rainfall_2009 <dbl>, rainfall_2010 <dbl>,
#> #   rainfall_2011 <dbl>, rainfall_2012 <dbl>, rainfall_2013 <dbl>,
#> #   rainfall_2014 <dbl>, rainfall_2015 <dbl>, rainfall_2016 <dbl>,
#> #   rainfall_2017 <dbl>, rainfall_2018 <dbl>, rainfall_2019 <dbl>, …

covariates_to_lag <- c("rainfall", "temperature")
covariates_not_to_lag <- c("coffee")
covariates_to_lag
#> [1] "rainfall"    "temperature"

vec_lags <- 0:3

example_lagged_covariates <- example_covariates |>
  select(
    -all_of(c(covariates_not_to_lag, "obs"))
  ) |>
  pivot_longer(
    cols = -c("row", "start_year"),
    names_to = c("variable", "year"),
    names_sep = "_"
  ) |>
  pivot_wider(
    names_from = variable,
    values_from = value
  ) |>
  # add an expand.grid with the lags as well
  expand_grid(
    lags = vec_lags
  ) |>
  relocate(
    lags,
    .after = start_year
  ) |>
  # so whether year_start - lag is equal to that year
  mutate(
    year_lagged = start_year - lags,
    year = as.integer(year),
    .after = lags
  ) |>
  filter(
    year_lagged == year
  )  |>
  select(
    -year,
    -year_lagged
  ) |>
  pivot_wider(
    names_from = c("lags"),
    values_from = all_of(covariates_to_lag)
  )

example_covariates
#> # A tibble: 100 × 50
#>      row    obs start_year rainfall_2000 rainfall_2001 rainfall_2002
#>    <int>  <dbl>      <int>         <dbl>         <dbl>         <dbl>
#>  1     1 0.680        2011        0.585         0.855         0.698 
#>  2     2 0.793        2011        0.0466        0.798         0.277 
#>  3     3 0.652        2006        0.696         0.193         0.423 
#>  4     4 0.316        2003        0.762         0.537         0.179 
#>  5     5 0.656        2009        0.767         0.0191        0.980 
#>  6     6 0.269        2017        0.554         0.450         0.529 
#>  7     7 0.892        2013        0.723         0.720         0.521 
#>  8     8 0.962        2018        0.199         0.192         0.887 
#>  9     9 0.428        2020        0.199         0.229         0.0940
#> 10    10 0.0542       2018        0.672         0.163         0.0945
#> # ℹ 90 more rows
#> # ℹ 44 more variables: rainfall_2003 <dbl>, rainfall_2004 <dbl>,
#> #   rainfall_2005 <dbl>, rainfall_2006 <dbl>, rainfall_2007 <dbl>,
#> #   rainfall_2008 <dbl>, rainfall_2009 <dbl>, rainfall_2010 <dbl>,
#> #   rainfall_2011 <dbl>, rainfall_2012 <dbl>, rainfall_2013 <dbl>,
#> #   rainfall_2014 <dbl>, rainfall_2015 <dbl>, rainfall_2016 <dbl>,
#> #   rainfall_2017 <dbl>, rainfall_2018 <dbl>, rainfall_2019 <dbl>, …
example_lagged_covariates
#> # A tibble: 100 × 10
#>      row start_year rainfall_3 rainfall_2 rainfall_1 rainfall_0 temperature_3
#>    <int>      <int>      <dbl>      <dbl>      <dbl>      <dbl>         <dbl>
#>  1     1       2011     0.170       0.733      0.844      0.604       0.403  
#>  2     2       2011     0.221       0.504      0.525      0.845       0.843  
#>  3     3       2006     0.566       0.301      0.408      0.211       0.650  
#>  4     4       2003     0.762       0.537      0.179      0.691       0.708  
#>  5     5       2009     0.431       0.711      0.744      0.801       0.542  
#>  6     6       2017     0.616       0.896      0.672      0.527       0.725  
#>  7     7       2013     0.636       0.812      0.479      0.132       0.199  
#>  8     8       2018     0.0335      0.802      0.711      0.850       0.0517 
#>  9     9       2020     0.393       0.556      0.320      0.144       0.00640
#> 10    10       2018     0.211       0.147      0.482      0.191       0.0952 
#> # ℹ 90 more rows
#> # ℹ 3 more variables: temperature_2 <dbl>, temperature_1 <dbl>,
#> #   temperature_0 <dbl>

Created on 2024-11-18 with reprex v2.1.1

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.4.2 (2024-10-31)
#>  os       macOS Sequoia 15.1
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Australia/Hobart
#>  date     2024-11-18
#>  pandoc   3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  cli           3.6.3   2024-06-21 [1] CRAN (R 4.4.0)
#>  colorspace    2.1-1   2024-07-26 [1] CRAN (R 4.4.0)
#>  digest        0.6.37  2024-08-19 [1] CRAN (R 4.4.1)
#>  dplyr       * 1.1.4   2023-11-17 [1] CRAN (R 4.4.0)
#>  evaluate      1.0.1   2024-10-10 [1] CRAN (R 4.4.1)
#>  fansi         1.0.6   2023-12-08 [1] CRAN (R 4.4.0)
#>  fastmap       1.2.0   2024-05-15 [1] CRAN (R 4.4.0)
#>  forcats     * 1.0.0   2023-01-29 [1] CRAN (R 4.4.0)
#>  fs            1.6.5   2024-10-30 [1] CRAN (R 4.4.1)
#>  generics      0.1.3   2022-07-05 [1] CRAN (R 4.4.0)
#>  ggplot2     * 3.5.1   2024-04-23 [1] CRAN (R 4.4.0)
#>  glue          1.8.0   2024-09-30 [1] CRAN (R 4.4.1)
#>  gtable        0.3.6   2024-10-25 [1] CRAN (R 4.4.1)
#>  hms           1.1.3   2023-03-21 [1] CRAN (R 4.4.0)
#>  htmltools     0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
#>  knitr         1.49    2024-11-08 [1] CRAN (R 4.4.1)
#>  lifecycle     1.0.4   2023-11-07 [1] CRAN (R 4.4.0)
#>  lubridate   * 1.9.3   2023-09-27 [1] CRAN (R 4.4.0)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.4.0)
#>  munsell       0.5.1   2024-04-01 [1] CRAN (R 4.4.0)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.4.0)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.4.0)
#>  purrr       * 1.0.2   2023-08-10 [1] CRAN (R 4.4.0)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.4.0)
#>  readr       * 2.1.5   2024-01-10 [1] CRAN (R 4.4.0)
#>  reprex        2.1.1   2024-07-06 [1] CRAN (R 4.4.0)
#>  rlang         1.1.4   2024-06-04 [1] CRAN (R 4.4.0)
#>  rmarkdown     2.29    2024-11-04 [1] CRAN (R 4.4.1)
#>  rstudioapi    0.17.1  2024-10-22 [1] CRAN (R 4.4.1)
#>  scales        1.3.0   2023-11-28 [1] CRAN (R 4.4.0)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.4.0)
#>  stringi       1.8.4   2024-05-06 [1] CRAN (R 4.4.0)
#>  stringr     * 1.5.1   2023-11-14 [1] CRAN (R 4.4.0)
#>  tibble      * 3.2.1   2023-03-20 [1] CRAN (R 4.4.0)
#>  tidyr       * 1.3.1   2024-01-24 [1] CRAN (R 4.4.0)
#>  tidyselect    1.2.1   2024-03-11 [1] CRAN (R 4.4.0)
#>  tidyverse   * 2.0.0   2023-02-22 [1] CRAN (R 4.4.0)
#>  timechange    0.3.0   2024-01-18 [1] CRAN (R 4.4.0)
#>  tzdb          0.4.0   2023-05-12 [1] CRAN (R 4.4.0)
#>  utf8          1.2.4   2023-10-22 [1] CRAN (R 4.4.0)
#>  vctrs         0.6.5   2023-12-01 [1] CRAN (R 4.4.0)
#>  withr         3.0.2   2024-10-28 [1] CRAN (R 4.4.1)
#>  xfun          0.49    2024-10-31 [1] CRAN (R 4.4.1)
#>  yaml          2.3.10  2024-07-26 [1] CRAN (R 4.4.0)
#> 
#>  [1] /Users/nick/Library/R/arm64/4.4/library
#>  [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment