expersso · September 12, 2018 15:11
diff --git a/tidy_excel.R b/tidy_excel.R
 # Inspired by http://www.brodrigues.co/blog/2018-09-11-human_to_machine/

 library(tidyverse)
 library(readxl)

 # Extract all data from one sheet into tidy dataframe
 get_sheet <- function(sheet, path = "time-use.xlsx") {
  
  ranges <- list(
    col  = "C3:BS5",
    row  = "A6:B58",
    data = "C6:BS58"
  )
  
  # Read the three different ranges into a list
  x <- imap(ranges, ~read_excel(path, sheet, ., FALSE))
  
  # Extract column names
  x$col <- x$col %>% 
    discard(~all(is.na(.))) %>% 
    t() %>% 
    as.data.frame() %>% 
    set_names(c("population", "subgroup", "unit")) %>% 
    fill(population, subgroup, unit)
  
  # Extract row names
  x$row <- x$row %>% 
    unite(row, 1:2, sep = "_") %>% 
    pull()
  
  # Extract actual data
  x$data <- x$data %>% 
    discard(~all(is.na(.))) %>% 
    t()
  
  # Combine into tibble
  cbind(x$col, x$data) %>% 
    set_names(c(names(x$col), x$row)) %>% 
    gather(id, value, -c(1:3)) %>% 
    separate(id, c("activity_id", "activity"), sep = "_") %>% 
    tbl_df()
 }

 # Read data from all relevant sheets into one dataframe
 df_full <- excel_sheets("time-use.xlsx")[7:10] %>% 
  set_names() %>% 
  imap_dfr(~get_sheet(.), .id = "day")

 # Extract a subset of the data to match original blog post
 df_sub <- df_full %>% 
  filter(subgroup == "Total") %>% 
  select(day, population, activity, time = value)
	# Inspired by http://www.brodrigues.co/blog/2018-09-11-human_to_machine/

	library(tidyverse)
	library(readxl)

	# Extract all data from one sheet into tidy dataframe
	get_sheet <- function(sheet, path = "time-use.xlsx") {

	ranges <- list(
	col = "C3:BS5",
	row = "A6:B58",
	data = "C6:BS58"
	)

	# Read the three different ranges into a list
	x <- imap(ranges, ~read_excel(path, sheet, ., FALSE))

	# Extract column names
	x$col <- x$col %>%
	discard(~all(is.na(.))) %>%
	t() %>%
	as.data.frame() %>%
	set_names(c("population", "subgroup", "unit")) %>%
	fill(population, subgroup, unit)

	# Extract row names
	x$row <- x$row %>%
	unite(row, 1:2, sep = "_") %>%
	pull()

	# Extract actual data
	x$data <- x$data %>%
	discard(~all(is.na(.))) %>%
	t()

	# Combine into tibble
	cbind(x$col, x$data) %>%
	set_names(c(names(x$col), x$row)) %>%
	gather(id, value, -c(1:3)) %>%
	separate(id, c("activity_id", "activity"), sep = "_") %>%
	tbl_df()
	}

	# Read data from all relevant sheets into one dataframe
	df_full <- excel_sheets("time-use.xlsx")[7:10] %>%
	set_names() %>%
	imap_dfr(~get_sheet(.), .id = "day")

	# Extract a subset of the data to match original blog post
	df_sub <- df_full %>%
	filter(subgroup == "Total") %>%
	select(day, population, activity, time = value)