Last active
December 10, 2019 07:06
-
-
Save jonspring/51aef364073392c2b9a23e1376edf5f9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Some ideas to try out on the data from: | |
# https://gist.github.com/brooke-watson/ccf3d1b1f4449ab55a72f7835a52e599 | |
# 0. Let's describe what type of data in each row | |
sw1_annotated <- starwars_garbage_data1 %>% | |
# Counting each new group of data | |
mutate(group = cumsum(v1 == "Character Name")) %>% | |
# Assign rows within each group | |
group_by(group) %>% | |
mutate(type = if_else(!is.na(as.numeric(v1)), "row", "category")) %>% | |
ungroup() | |
# 1. The row-level data is already ready. No prob here. | |
row_data <- sw1_annotated %>% | |
filter(type == "row") | |
# 2. For each group, I define the following expected structure: | |
location_table <- tribble( | |
~row, ~col, ~stat, | |
2, 1, "Name", | |
2, 3, "Eye", | |
2, 4, "Height", | |
3, 2, "Person-film", | |
3, 4, "Film", | |
4, 2, "Species" | |
) | |
# Convert the category-level data to long form with each cell's coordinates. | |
# Join to the defined location table to reshape into tidy form. | |
category_data <- sw1_annotated %>% | |
filter(type != "row") %>% | |
group_by(group) %>% mutate(row = row_number()) %>% ungroup() %>% | |
pivot_longer(v1:v4, | |
names_to = "col", | |
names_prefix = "v", | |
names_ptypes = list(col = integer()), | |
values_to = "val") %>% | |
left_join(location_table) %>% | |
filter(!is.na(stat)) %>% | |
select(group, val, stat) %>% | |
pivot_wider(names_from = stat, values_from = val) | |
# Voila! Combine the category info with the row data. | |
output <- category_data %>% | |
right_join(row_data) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment