Skip to content

Instantly share code, notes, and snippets.

@ateucher
Last active August 29, 2015 14:13
Show Gist options
  • Save ateucher/81209abc90ad2fcbe3c8 to your computer and use it in GitHub Desktop.
Save ateucher/81209abc90ad2fcbe3c8 to your computer and use it in GitHub Desktop.
Fill dates
#'Fill gaps in a date sequence
#'
#'Given a dataframe with one column as a date sequence, fill gaps in the dat sequence.
#' @param df Dataframe
#' @param date_col the column containing dates
#' @param interval The interval in the date sequence. If \code{NULL}, calculated automatically.
#' @param fill_cols Columns to fill with the value in the column (should be columns where value is same in every row, such as an ID.)
#' @export
#' @return dataframe with filled in dates
#' @examples \dontrun{
#' foo <- data.frame(Date = seq(as.Date("2008-01-01"), as.Date("2008-12-01"), by = "month"),
#' val = round(rnorm(12, 5, 2), 1), label = rep("a", 12))
#' bar <- foo[-c(2,5,6,7,10),]
#' date_fill(bar, "Date", interval = "1 month", fill_cols = "label")
#'}
date_fill <- function (df, date_col, interval = NULL, fill_cols = NULL) {
if (!is.null(interval) &&
(!is.numeric(interval) &&
!grep("sec|min|hour|day|DSTday|week|month|quarter|year", interval))) {
stop("Specified interval is not valid. See ?seq.Date and ",
"?seq.POSIXt for help, or let the function find it for you")
}
df <- df[order(df[[date_col]]),]
dates <- as.data.frame(df)[,date_col]
start.date <- min(dates, na.rm = TRUE)
end.date <- max(dates, na.rm = TRUE)a
if (is.null(interval)) {
if (inherits(dates, "Date")) {
interval <- "day"
} else {
interval <- find_time_int(dates)
}
}
if (length(unique(diff(dates))) != 1L) {
all.dates <- data.frame(date = seq(start.date, end.date,
by = interval))
out <- merge(df, all.dates, by.x = date_col, by.y = "date", all = TRUE)
if (!is.null(fill_cols)) {
for (col in fill_cols) {
fill_name <- out[1,col]
out[,col] <- fill_name
}
}
} else {
out <- df
}
out
}
#'Find time interval in a date sequence
#'
#'Taken from package openair
#' @param dates vector of dates
#' @export
#' @return an integer reflecting the number of seconds in the time interval
#' @examples \dontrun{
#'
#'}
find_time_int <- function (dates) {
dates <- unique(dates)
len <- length(dates)
len <- min(c(100, len))
id <- which.max(table(diff(as.numeric(dates[order(dates[1:len])]))))
seconds <- as.numeric(names(id))
if (inherits(dates, "POSIXt")){
seconds <- paste(seconds, "sec")
}
if (inherits(dates, "Date")) {
seconds <- 3600 * 24
seconds <- paste(seconds, "sec")
}
seconds
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment