Last active
August 29, 2015 14:13
-
-
Save ateucher/81209abc90ad2fcbe3c8 to your computer and use it in GitHub Desktop.
Fill dates
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#'Fill gaps in a date sequence | |
#' | |
#'Given a dataframe with one column as a date sequence, fill gaps in the dat sequence. | |
#' @param df Dataframe | |
#' @param date_col the column containing dates | |
#' @param interval The interval in the date sequence. If \code{NULL}, calculated automatically. | |
#' @param fill_cols Columns to fill with the value in the column (should be columns where value is same in every row, such as an ID.) | |
#' @export | |
#' @return dataframe with filled in dates | |
#' @examples \dontrun{ | |
#' foo <- data.frame(Date = seq(as.Date("2008-01-01"), as.Date("2008-12-01"), by = "month"), | |
#' val = round(rnorm(12, 5, 2), 1), label = rep("a", 12)) | |
#' bar <- foo[-c(2,5,6,7,10),] | |
#' date_fill(bar, "Date", interval = "1 month", fill_cols = "label") | |
#'} | |
date_fill <- function (df, date_col, interval = NULL, fill_cols = NULL) { | |
if (!is.null(interval) && | |
(!is.numeric(interval) && | |
!grep("sec|min|hour|day|DSTday|week|month|quarter|year", interval))) { | |
stop("Specified interval is not valid. See ?seq.Date and ", | |
"?seq.POSIXt for help, or let the function find it for you") | |
} | |
df <- df[order(df[[date_col]]),] | |
dates <- as.data.frame(df)[,date_col] | |
start.date <- min(dates, na.rm = TRUE) | |
end.date <- max(dates, na.rm = TRUE)a | |
if (is.null(interval)) { | |
if (inherits(dates, "Date")) { | |
interval <- "day" | |
} else { | |
interval <- find_time_int(dates) | |
} | |
} | |
if (length(unique(diff(dates))) != 1L) { | |
all.dates <- data.frame(date = seq(start.date, end.date, | |
by = interval)) | |
out <- merge(df, all.dates, by.x = date_col, by.y = "date", all = TRUE) | |
if (!is.null(fill_cols)) { | |
for (col in fill_cols) { | |
fill_name <- out[1,col] | |
out[,col] <- fill_name | |
} | |
} | |
} else { | |
out <- df | |
} | |
out | |
} | |
#'Find time interval in a date sequence | |
#' | |
#'Taken from package openair | |
#' @param dates vector of dates | |
#' @export | |
#' @return an integer reflecting the number of seconds in the time interval | |
#' @examples \dontrun{ | |
#' | |
#'} | |
find_time_int <- function (dates) { | |
dates <- unique(dates) | |
len <- length(dates) | |
len <- min(c(100, len)) | |
id <- which.max(table(diff(as.numeric(dates[order(dates[1:len])])))) | |
seconds <- as.numeric(names(id)) | |
if (inherits(dates, "POSIXt")){ | |
seconds <- paste(seconds, "sec") | |
} | |
if (inherits(dates, "Date")) { | |
seconds <- 3600 * 24 | |
seconds <- paste(seconds, "sec") | |
} | |
seconds | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment