nacnudus · September 15, 2018 10:02
diff --git a/luxembourg-time-use-unpivotr.R b/luxembourg-time-use-unpivotr.R
 # Inspired by http://www.brodrigues.co/blog/2018-09-11-human_to_machine/
 # https://twitter.com/brodriguesco/status/1039604517287931904
 # "You can find the data I will use here. Click on the “Time use” folder and you can download the workbook."
 # http://statistiques.public.lu/stat/ReportFolders/ReportFolder.aspx?IF_Language=eng&MainTheme=3&FldrName=1&RFPath=14306

 library(tidyverse)
 library(tidyxl)
 library(unpivotr)
 library(lubridate)

 path <- "./download.xlsx"
 formats <- xlsx_formats(path)

 cells <-
  path %>%
  xlsx_cells() %>%
  # Drop French and Index sheets
  dplyr::filter(str_detect(sheet, "day$")) %>%
  # Clean character values
  mutate(character = str_trim(character)) %>%
  # Drop empty cells
  dplyr::filter(data_type != "blank",
                !(data_type == "character" && character == "")) %>%
  # Drop total rows
  dplyr::filter(row <= 58L) %>%
  # Separate out the bold categories in the first two columns by budging them to
  # the left, then budging everything to the right so that all column numbers
  # are positive.
  mutate(col = if_else(col %in% 1:2 & formats$local$font$bold[local_format_id],
                       col - 2L,
                       col),
         col = col + 2L) %>%
  # Fix time values expressed as dates rather than character
  mutate(character = if_else(data_type == "date", "00:00", character),
         data_type = if_else(data_type == "date", "character", data_type))

 # Tidy every sheet
 tidy_sheet <- function(cells) {
  series <- dplyr::filter(cells, row == 1L, col == 1L)$character
  cells %>%
    dplyr::filter(row >= 2L) %>%
    behead("WNW", "activity_category_id") %>%
    behead("WNW", "activity_category") %>%
    behead("W", "activity_subcategory_id") %>%
    behead("W", "activity_subcategory") %>%
    behead("NNW", "grouping") %>%
    behead("NNW", "group") %>%
    behead("NNW", "metric") %>%
    behead("N", "unit") %>%
    select(-row, -col) %>%
    spatter(unit) %>% # like tidyr::spread() by handles mixed data types
    mutate(Time = as.integer(as.duration(lubridate::hm(Time))))
 }

 tidy_data <-
  cells %>%
  select(sheet, row, col, data_type, character, numeric) %>%
  nest(-sheet) %>%
  mutate(data = map(data, tidy_sheet)) %>%
  unnest()
 tidy_data
	# Inspired by http://www.brodrigues.co/blog/2018-09-11-human_to_machine/
	# https://twitter.com/brodriguesco/status/1039604517287931904
	# "You can find the data I will use here. Click on the “Time use” folder and you can download the workbook."
	# http://statistiques.public.lu/stat/ReportFolders/ReportFolder.aspx?IF_Language=eng&MainTheme=3&FldrName=1&RFPath=14306

	library(tidyverse)
	library(tidyxl)
	library(unpivotr)
	library(lubridate)

	path <- "./download.xlsx"
	formats <- xlsx_formats(path)

	cells <-
	path %>%
	xlsx_cells() %>%
	# Drop French and Index sheets
	dplyr::filter(str_detect(sheet, "day$")) %>%
	# Clean character values
	mutate(character = str_trim(character)) %>%
	# Drop empty cells
	dplyr::filter(data_type != "blank",
	!(data_type == "character" && character == "")) %>%
	# Drop total rows
	dplyr::filter(row <= 58L) %>%
	# Separate out the bold categories in the first two columns by budging them to
	# the left, then budging everything to the right so that all column numbers
	# are positive.
	mutate(col = if_else(col %in% 1:2 & formats$local$font$bold[local_format_id],
	col - 2L,
	col),
	col = col + 2L) %>%
	# Fix time values expressed as dates rather than character
	mutate(character = if_else(data_type == "date", "00:00", character),
	data_type = if_else(data_type == "date", "character", data_type))

	# Tidy every sheet
	tidy_sheet <- function(cells) {
	series <- dplyr::filter(cells, row == 1L, col == 1L)$character
	cells %>%
	dplyr::filter(row >= 2L) %>%
	behead("WNW", "activity_category_id") %>%
	behead("WNW", "activity_category") %>%
	behead("W", "activity_subcategory_id") %>%
	behead("W", "activity_subcategory") %>%
	behead("NNW", "grouping") %>%
	behead("NNW", "group") %>%
	behead("NNW", "metric") %>%
	behead("N", "unit") %>%
	select(-row, -col) %>%
	spatter(unit) %>% # like tidyr::spread() by handles mixed data types
	mutate(Time = as.integer(as.duration(lubridate::hm(Time))))
	}

	tidy_data <-
	cells %>%
	select(sheet, row, col, data_type, character, numeric) %>%
	nest(-sheet) %>%
	mutate(data = map(data, tidy_sheet)) %>%
	unnest()
	tidy_data