royseto · August 29, 2015 14:19
diff --git a/utilization_by_hour_prototype.R b/utilization_by_hour_prototype.R
 # Prototype for transforming a set of time intervals with arrival and departure
 # timestamps into utilization by hour

 library(lubridate)

 # Period start and end

 period_start <- ymd("2015-04-01")
 period_end <- ymd("2015-05-01")

 # Vectors of start and end timestamps. Assume end >= start.

 s_times <- c(ymd_hms("2015-04-08 11:20:03"),
             ymd_hms("2015-04-18 19:55:32"),
             ymd_hms("2015-04-03 01:01:35"))

 e_times <- c(ymd_hms("2015-04-08 14:25:03"),
             ymd_hms("2015-04-18 20:11:32"),
             ymd_hms("2015-04-03 01:03:48"))

 s_offsets <- as.double(s_times - period_start, units = "hours")
 e_offsets <- as.double(e_times - period_start, units = "hours")

 # s_buckets <- floor(s_offsets)
 # s_frac <- s_offsets - s_buckets
 # e_buckets <- floor(e_offsets)
 # e_frac <- e_offsets - e_buckets

 # Or more concisely, work on arrivals and departures at once using a matrix
 # or data frame:

 offsets <- cbind(s_offsets, e_offsets)
 buckets <- floor(offsets)
 frac <- offsets - buckets

 frac_first <-
    ifelse(buckets[, 2] == buckets[, 1],
           frac[, 2] - frac[, 1],
           1.0 - frac[, 1])

 frac_last <-
    ifelse(buckets[, 2] == buckets[, 1],
           frac[, 2] - frac[, 1],
           frac[, 2] - 0.0)

 num_observations <- length(s_times)
 num_periods <- ceiling(as.double(period_end - period_start, units = "hours"))

 tmp_util <- matrix(rep.int(0.0, num_observations * num_periods),
                   nrow = num_observations,
                   ncol = num_periods)

 # Now assign the fractions for the first and last buckets and fill in 1.0 for
 # all the buckets in between (vectorized over the observation rows). Run apply
 # on the updated matrix to add up the columns to get total utilization by hour.
 # Run more functions on a vector of bucket start times to get the day of week
 # and hour of day for each bucket, and find a way to reduce the results on a
 # (dow, hour) key. Finally, convert the result to a data frame with columns
 # (dow, hour, utilization).

 # Check these out:
 # http://www.inside-r.org/r-doc/base/Map
 # http://www.johnmyleswhite.com/notebook/2010/09/23/higher-order-functions-in-r/
 # http://adv-r.had.co.nz/Functionals.html
	# Prototype for transforming a set of time intervals with arrival and departure
	# timestamps into utilization by hour

	library(lubridate)

	# Period start and end

	period_start <- ymd("2015-04-01")
	period_end <- ymd("2015-05-01")

	# Vectors of start and end timestamps. Assume end >= start.

	s_times <- c(ymd_hms("2015-04-08 11:20:03"),
	ymd_hms("2015-04-18 19:55:32"),
	ymd_hms("2015-04-03 01:01:35"))

	e_times <- c(ymd_hms("2015-04-08 14:25:03"),
	ymd_hms("2015-04-18 20:11:32"),
	ymd_hms("2015-04-03 01:03:48"))

	s_offsets <- as.double(s_times - period_start, units = "hours")
	e_offsets <- as.double(e_times - period_start, units = "hours")

	# s_buckets <- floor(s_offsets)
	# s_frac <- s_offsets - s_buckets
	# e_buckets <- floor(e_offsets)
	# e_frac <- e_offsets - e_buckets

	# Or more concisely, work on arrivals and departures at once using a matrix
	# or data frame:

	offsets <- cbind(s_offsets, e_offsets)
	buckets <- floor(offsets)
	frac <- offsets - buckets

	frac_first <-
	ifelse(buckets[, 2] == buckets[, 1],
	frac[, 2] - frac[, 1],
	1.0 - frac[, 1])

	frac_last <-
	ifelse(buckets[, 2] == buckets[, 1],
	frac[, 2] - frac[, 1],
	frac[, 2] - 0.0)

	num_observations <- length(s_times)
	num_periods <- ceiling(as.double(period_end - period_start, units = "hours"))

	tmp_util <- matrix(rep.int(0.0, num_observations * num_periods),
	nrow = num_observations,
	ncol = num_periods)

	# Now assign the fractions for the first and last buckets and fill in 1.0 for
	# all the buckets in between (vectorized over the observation rows). Run apply
	# on the updated matrix to add up the columns to get total utilization by hour.
	# Run more functions on a vector of bucket start times to get the day of week
	# and hour of day for each bucket, and find a way to reduce the results on a
	# (dow, hour) key. Finally, convert the result to a data frame with columns
	# (dow, hour, utilization).

	# Check these out:
	# http://www.inside-r.org/r-doc/base/Map
	# http://www.johnmyleswhite.com/notebook/2010/09/23/higher-order-functions-in-r/
	# http://adv-r.had.co.nz/Functionals.html