jonspring · December 10, 2019 07:06
diff --git a/starwars_pivot b/starwars_pivot
 # Some ideas to try out on the data from:
 # https://gist.github.com/brooke-watson/ccf3d1b1f4449ab55a72f7835a52e599



 # 0. Let's describe what type of data in each row
 sw1_annotated <- starwars_garbage_data1 %>%
  # Counting each new group of data
  mutate(group = cumsum(v1 == "Character Name")) %>%
  # Assign rows within each group
  group_by(group) %>%
  mutate(type = if_else(!is.na(as.numeric(v1)), "row", "category")) %>%
  ungroup()


 # 1. The row-level data is already ready. No prob here.
 row_data <- sw1_annotated %>%
  filter(type == "row") 

 # 2. For each group, I define the following expected structure:
 location_table <- tribble(
  ~row, ~col, ~stat,
  2, 1, "Name",
  2, 3, "Eye",
  2, 4, "Height",
  3, 2, "Person-film",
  3, 4, "Film",
  4, 2, "Species"
 )

 # Convert the category-level data to long form with each cell's coordinates.
 # Join to the defined location table to reshape into tidy form.
 category_data <- sw1_annotated %>%
  filter(type != "row") %>%
  group_by(group) %>% mutate(row = row_number()) %>% ungroup() %>%
  pivot_longer(v1:v4,
               names_to = "col",
               names_prefix = "v",
               names_ptypes = list(col = integer()),
               values_to = "val") %>%
  left_join(location_table) %>%
  filter(!is.na(stat)) %>%
  select(group, val, stat) %>%
  pivot_wider(names_from = stat, values_from = val)
 
 # Voila! Combine the category info with the row data.
 output <- category_data %>%
  right_join(row_data)
	# Some ideas to try out on the data from:
	# https://gist.github.com/brooke-watson/ccf3d1b1f4449ab55a72f7835a52e599



	# 0. Let's describe what type of data in each row
	sw1_annotated <- starwars_garbage_data1 %>%
	# Counting each new group of data
	mutate(group = cumsum(v1 == "Character Name")) %>%
	# Assign rows within each group
	group_by(group) %>%
	mutate(type = if_else(!is.na(as.numeric(v1)), "row", "category")) %>%
	ungroup()


	# 1. The row-level data is already ready. No prob here.
	row_data <- sw1_annotated %>%
	filter(type == "row")

	# 2. For each group, I define the following expected structure:
	location_table <- tribble(
	~row, ~col, ~stat,
	2, 1, "Name",
	2, 3, "Eye",
	2, 4, "Height",
	3, 2, "Person-film",
	3, 4, "Film",
	4, 2, "Species"
	)

	# Convert the category-level data to long form with each cell's coordinates.
	# Join to the defined location table to reshape into tidy form.
	category_data <- sw1_annotated %>%
	filter(type != "row") %>%
	group_by(group) %>% mutate(row = row_number()) %>% ungroup() %>%
	pivot_longer(v1:v4,
	names_to = "col",
	names_prefix = "v",
	names_ptypes = list(col = integer()),
	values_to = "val") %>%
	left_join(location_table) %>%
	filter(!is.na(stat)) %>%
	select(group, val, stat) %>%
	pivot_wider(names_from = stat, values_from = val)

	# Voila! Combine the category info with the row data.
	output <- category_data %>%
	right_join(row_data)