Last active
November 22, 2021 17:42
-
-
Save jeremy-allen/7eefa5fb019a4b85b04a037843231e6b to your computer and use it in GitHub Desktop.
separate and pivot_longer with multiple obs as column names
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# This fake data matches real-world data I was given. | |
# The level of analysis is the units of peppers (rows), | |
# but we see subunits here, and need to make those the | |
# level of analysis - one subunit per row, i.e., tidy. | |
# Notice the description column is not named like the | |
# other subunit columns. | |
# This data matches the example described on the pivot_longer | |
# help page as "Multiple observations per row." | |
# A complication is that each Description does not | |
# contain the same number of subunits. | |
peppers <- tibble( | |
unit_id = c("000000001", "000000002", "000000003"), | |
date = c("2021-01-01", "2021-01-02", "2021-01-01"), | |
description = c("subunit 1 is great and subunit 2 is cool", "subunit 1 is awesome and subunit 2 is amazing", "subunit 1 is bad and subunit 2 is gross and subunit 3 is rotten"), | |
su1_weight = c(100, 50, 150), | |
su1_length = c(5, 2, 7), | |
su2_weight = c(200, 150, 300), | |
su2_length = c(12, 7, 24) | |
) | |
peppers | |
# The description column can be separated into new columns, | |
# and if we make names of the new columns match the format of | |
# the other subunit column names we can take advantage of a certain | |
# pivot_longer argument. | |
peppers_sep <- peppers %>% | |
separate( | |
description, | |
into = c("su1_description", "su2_description", "su3_description"), | |
sep = "\\sand\\s" | |
) | |
peppers_sep | |
peppers_long <- peppers_sep %>% pivot_longer( | |
cols = starts_with("su"), # now we can do because all subunit cols start this way now | |
names_to = c("subunit", ".value"), # the special argument with .value! | |
names_pattern = "(su\\d)_(\\w+)" # regex that matches existing col names | |
) | |
peppers_long | |
# Because unit 000000003 had three subunits while the others | |
# only two, we get descritpion rows with NA. So we drop those rows. | |
peppers_complete <- peppers_long %>% | |
filter(!is.na(description)) | |
peppers_complete |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment