Last active
November 5, 2022 02:47
-
-
Save jmbarbone/144eaf1fb16d5bb32d5a57f41441c293 to your computer and use it in GitHub Desktop.
rle() with some enhancements. Repurposed from a separate script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function has been repurposed | |
# a few edits from the original have been made without testing | |
#' Run length encode | |
#' | |
#' Encodes a run length and returns the start and stop | |
#' | |
#' @param x A vector of values for compute the length of the run | |
#' @param times If `NULL` will use the position of the start and stop runs, | |
#' otherwise will return the values returned; if not `NULL`, must be equal | |
#' length as `x` | |
#' @param overlap Logical, if `TRUE` will move the `end` position (or value) of | |
#' single length `x` sets to the start of the next set. If `FALSE`, the _middle_ | |
#' distance between the start and end is used (good for preventing strict | |
#' cutoffs and _filling_). | |
#' | |
#' @returns A `data.frame` with the run value, the length, and the start and | |
#' stop positions/`times` as columns | |
#' | |
#' @seealso [base::rle] | |
#' @author [Jordan Mark Barbone](https://github.com/jmbarbone) | |
#' | |
#' @examples | |
#' # Demonstrations of rle: | |
#' x <- rev(rep(6:10, 1:5)) | |
#' | |
#' times <- c(0.96, 1.85, 2.49, 3.46, 4.08, 4.41, 4.76, | |
#' 5.15, 5.94, 5.98, 6.73, 7.40, 7.57, 7.84, 8.35) | |
#' | |
#' rle(x) | |
#' run_length_encode(x) | |
#' run_length_encode(x, times) | |
#' run_length_encode(x, times, overlap = TRUE) | |
#' | |
#' if (requireNamespace("dplyr", quietly = TRUE)) { | |
#' library(dplyr) | |
#' # Can be used with dplyr to summarise runs | |
#' df <- data.frame(x, times, group = rep(c("a", "a", "b", "c", "c"), 1:5)) | |
#' df %>% | |
#' dplyr::group_by(group) %>% | |
#' dplyr::summarise(run_length_encode(x, times)) | |
#' } | |
#' | |
#' @export | |
run_length_encode <- function(x, times = NULL, overlap = FALSE) { | |
ls <- unclass(rle(x)) | |
end <- cumsum(ls$lengths) | |
start <- end - ls$lengths + 1L | |
n <- length(end) | |
if (!is.null(times)) { | |
if (length(x) != length(times)) { | |
stop("Length of x and times must be the same", call. = FALSE) | |
} | |
end <- times[end] | |
start <- times[start] | |
} | |
if (isTRUE(overlap) & n >= 3) { | |
new <- (start[2:n] + end[1:(n - 1)]) / 2 | |
start[2:n] <- new | |
end[1:(n - 1)] <- new | |
} | |
names(ls) <- c("run_length", deparse1(substitute(x))) | |
ls <- c(ls[2:1], list(start = start, end = end)) | |
as.data.frame(ls) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Demonstrations of rle: | |
x <- rev(rep(6:10, 1:5)) | |
times <- c(0.96, 1.85, 2.49, 3.46, 4.08, 4.41, 4.76, | |
5.15, 5.94, 5.98, 6.73, 7.40, 7.57, 7.84, 8.35) | |
rle(x) | |
#> Run Length Encoding | |
#> lengths: int [1:5] 5 4 3 2 1 | |
#> values : int [1:5] 10 9 8 7 6 | |
run_length_encode(x) | |
#> x run_length start end | |
#> 1 10 5 1 5 | |
#> 2 9 4 6 9 | |
#> 3 8 3 10 12 | |
#> 4 7 2 13 14 | |
#> 5 6 1 15 15 | |
run_length_encode(x, times) | |
#> x run_length start end | |
#> 1 10 5 0.96 4.08 | |
#> 2 9 4 4.41 5.94 | |
#> 3 8 3 5.98 7.40 | |
#> 4 7 2 7.57 7.84 | |
#> 5 6 1 8.35 8.35 | |
run_length_encode(x, times, overlap = TRUE) | |
#> x run_length start end | |
#> 1 10 5 0.960 4.245 | |
#> 2 9 4 4.245 5.960 | |
#> 3 8 3 5.960 7.485 | |
#> 4 7 2 7.485 8.095 | |
#> 5 6 1 8.095 8.350 | |
if (requireNamespace("dplyr", quietly = TRUE)) { | |
library(dplyr) | |
# Can be used with dplyr to summarise runs | |
df <- data.frame(x, times, group = rep(c("a", "a", "b", "c", "c"), 1:5)) | |
df %>% | |
dplyr::group_by(group) %>% | |
dplyr::summarise(run_length_encode(x, times)) | |
} | |
#> | |
#> Attaching package: 'dplyr' | |
#> The following objects are masked from 'package:stats': | |
#> | |
#> filter, lag | |
#> The following objects are masked from 'package:base': | |
#> | |
#> intersect, setdiff, setequal, union | |
#> `summarise()` has grouped output by 'group'. You can override using the | |
#> `.groups` argument. | |
#> # A tibble: 7 × 5 | |
#> # Groups: group [3] | |
#> group x run_length start end | |
#> <chr> <int> <int> <dbl> <dbl> | |
#> 1 a 10 3 0.96 2.49 | |
#> 2 b 10 2 3.46 4.08 | |
#> 3 b 9 1 4.41 4.41 | |
#> 4 c 9 3 4.76 5.94 | |
#> 5 c 8 3 5.98 7.4 | |
#> 6 c 7 2 7.57 7.84 | |
#> 7 c 6 1 8.35 8.35 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment