jmbarbone · November 5, 2022 02:47
diff --git a/run-length-encoding.R b/run-length-encoding.R
 # function has been repurposed
 # a few edits from the original have been made without testing

 #' Run length encode
 #'
 #' Encodes a run length and returns the start and stop
 #'
 #' @param x A vector of values for compute the length of the run
 #' @param times If `NULL` will use the position of the start and stop runs,
 #'   otherwise will return the values returned; if not `NULL`, must be equal
 #'   length as `x`
 #' @param overlap Logical, if `TRUE` will move the `end` position (or value) of
 #'   single length `x` sets to the start of the next set. If `FALSE`, the _middle_
 #'   distance between the start and end is used (good for preventing strict
 #'   cutoffs and _filling_).
 #'
 #' @returns A `data.frame` with the run value, the length, and the start and
 #'   stop positions/`times` as columns
 #'
 #' @seealso [base::rle]
 #' @author [Jordan Mark Barbone](https://github.com/jmbarbone)
 #'
 #' @examples
 #' # Demonstrations of rle:
 #' x <- rev(rep(6:10, 1:5))
 #'
 #' times <- c(0.96, 1.85, 2.49, 3.46, 4.08, 4.41, 4.76,
 #'            5.15, 5.94, 5.98, 6.73, 7.40, 7.57, 7.84, 8.35)
 #'
 #' rle(x)
 #' run_length_encode(x)
 #' run_length_encode(x, times)
 #' run_length_encode(x, times, overlap = TRUE)
 #'
 #' if (requireNamespace("dplyr", quietly = TRUE)) {
 #'   library(dplyr)
 #'   # Can be used with dplyr to summarise runs
 #'   df <- data.frame(x, times, group = rep(c("a", "a", "b", "c", "c"), 1:5))
 #'   df %>%
 #'     dplyr::group_by(group) %>%
 #'     dplyr::summarise(run_length_encode(x, times))
 #' }
 #'
 #' @export

 run_length_encode <- function(x, times = NULL, overlap = FALSE) {
  ls <- unclass(rle(x))
  end <- cumsum(ls$lengths)
  start <- end - ls$lengths + 1L
  n <- length(end)
  
  if (!is.null(times)) {
    if (length(x) != length(times)) {
      stop("Length of x and times must be the same", call. = FALSE)
    }
    
    end <- times[end]
    start <- times[start]
  }
  
  if (isTRUE(overlap) & n >= 3) {
    new <- (start[2:n] + end[1:(n - 1)]) / 2
    start[2:n] <- new
    end[1:(n - 1)] <- new
  }
  
  names(ls) <- c("run_length", deparse1(substitute(x)))
  ls <- c(ls[2:1], list(start = start, end = end))
  as.data.frame(ls)
 }
diff --git a/zzz-run-length-encoding.R b/zzz-run-length-encoding.R
 # Demonstrations of rle:
 x <- rev(rep(6:10, 1:5))

 times <- c(0.96, 1.85, 2.49, 3.46, 4.08, 4.41, 4.76,
           5.15, 5.94, 5.98, 6.73, 7.40, 7.57, 7.84, 8.35)

 rle(x)
 #> Run Length Encoding
 #>   lengths: int [1:5] 5 4 3 2 1
 #>   values : int [1:5] 10 9 8 7 6
 run_length_encode(x)
 #>    x run_length start end
 #> 1 10          5     1   5
 #> 2  9          4     6   9
 #> 3  8          3    10  12
 #> 4  7          2    13  14
 #> 5  6          1    15  15
 run_length_encode(x, times)
 #>    x run_length start  end
 #> 1 10          5  0.96 4.08
 #> 2  9          4  4.41 5.94
 #> 3  8          3  5.98 7.40
 #> 4  7          2  7.57 7.84
 #> 5  6          1  8.35 8.35
 run_length_encode(x, times, overlap = TRUE)
 #>    x run_length start   end
 #> 1 10          5 0.960 4.245
 #> 2  9          4 4.245 5.960
 #> 3  8          3 5.960 7.485
 #> 4  7          2 7.485 8.095
 #> 5  6          1 8.095 8.350

 if (requireNamespace("dplyr", quietly = TRUE)) {
  library(dplyr)
  # Can be used with dplyr to summarise runs
  df <- data.frame(x, times, group = rep(c("a", "a", "b", "c", "c"), 1:5))
  df %>%
    dplyr::group_by(group) %>%
    dplyr::summarise(run_length_encode(x, times))
 }
 #> 
 #> Attaching package: 'dplyr'
 #> The following objects are masked from 'package:stats':
 #> 
 #>     filter, lag
 #> The following objects are masked from 'package:base':
 #> 
 #>     intersect, setdiff, setequal, union
 #> `summarise()` has grouped output by 'group'. You can override using the
 #> `.groups` argument.
 #> # A tibble: 7 × 5
 #> # Groups:   group [3]
 #>   group     x run_length start   end
 #>   <chr> <int>      <int> <dbl> <dbl>
 #> 1 a        10          3  0.96  2.49
 #> 2 b        10          2  3.46  4.08
 #> 3 b         9          1  4.41  4.41
 #> 4 c         9          3  4.76  5.94
 #> 5 c         8          3  5.98  7.4 
 #> 6 c         7          2  7.57  7.84
 #> 7 c         6          1  8.35  8.35
	# function has been repurposed
	# a few edits from the original have been made without testing

	#' Run length encode
	#'
	#' Encodes a run length and returns the start and stop
	#'
	#' @param x A vector of values for compute the length of the run
	#' @param times If `NULL` will use the position of the start and stop runs,
	#' otherwise will return the values returned; if not `NULL`, must be equal
	#' length as `x`
	#' @param overlap Logical, if `TRUE` will move the `end` position (or value) of
	#' single length `x` sets to the start of the next set. If `FALSE`, the _middle_
	#' distance between the start and end is used (good for preventing strict
	#' cutoffs and _filling_).
	#'
	#' @returns A `data.frame` with the run value, the length, and the start and
	#' stop positions/`times` as columns
	#'
	#' @seealso [base::rle]
	#' @author [Jordan Mark Barbone](https://github.com/jmbarbone)
	#'
	#' @examples
	#' # Demonstrations of rle:
	#' x <- rev(rep(6:10, 1:5))
	#'
	#' times <- c(0.96, 1.85, 2.49, 3.46, 4.08, 4.41, 4.76,
	#' 5.15, 5.94, 5.98, 6.73, 7.40, 7.57, 7.84, 8.35)
	#'
	#' rle(x)
	#' run_length_encode(x)
	#' run_length_encode(x, times)
	#' run_length_encode(x, times, overlap = TRUE)
	#'
	#' if (requireNamespace("dplyr", quietly = TRUE)) {
	#' library(dplyr)
	#' # Can be used with dplyr to summarise runs
	#' df <- data.frame(x, times, group = rep(c("a", "a", "b", "c", "c"), 1:5))
	#' df %>%
	#' dplyr::group_by(group) %>%
	#' dplyr::summarise(run_length_encode(x, times))
	#' }
	#'
	#' @export

	run_length_encode <- function(x, times = NULL, overlap = FALSE) {
	ls <- unclass(rle(x))
	end <- cumsum(ls$lengths)
	start <- end - ls$lengths + 1L
	n <- length(end)

	if (!is.null(times)) {
	if (length(x) != length(times)) {
	stop("Length of x and times must be the same", call. = FALSE)
	}

	end <- times[end]
	start <- times[start]
	}

	if (isTRUE(overlap) & n >= 3) {
	new <- (start[2:n] + end[1:(n - 1)]) / 2
	start[2:n] <- new
	end[1:(n - 1)] <- new
	}

	names(ls) <- c("run_length", deparse1(substitute(x)))
	ls <- c(ls[2:1], list(start = start, end = end))
	as.data.frame(ls)
	}
	# Demonstrations of rle:
	x <- rev(rep(6:10, 1:5))

	times <- c(0.96, 1.85, 2.49, 3.46, 4.08, 4.41, 4.76,
	5.15, 5.94, 5.98, 6.73, 7.40, 7.57, 7.84, 8.35)

	rle(x)
	#> Run Length Encoding
	#> lengths: int [1:5] 5 4 3 2 1
	#> values : int [1:5] 10 9 8 7 6
	run_length_encode(x)
	#> x run_length start end
	#> 1 10 5 1 5
	#> 2 9 4 6 9
	#> 3 8 3 10 12
	#> 4 7 2 13 14
	#> 5 6 1 15 15
	run_length_encode(x, times)
	#> x run_length start end
	#> 1 10 5 0.96 4.08
	#> 2 9 4 4.41 5.94
	#> 3 8 3 5.98 7.40
	#> 4 7 2 7.57 7.84
	#> 5 6 1 8.35 8.35
	run_length_encode(x, times, overlap = TRUE)
	#> x run_length start end
	#> 1 10 5 0.960 4.245
	#> 2 9 4 4.245 5.960
	#> 3 8 3 5.960 7.485
	#> 4 7 2 7.485 8.095
	#> 5 6 1 8.095 8.350

	if (requireNamespace("dplyr", quietly = TRUE)) {
	library(dplyr)
	# Can be used with dplyr to summarise runs
	df <- data.frame(x, times, group = rep(c("a", "a", "b", "c", "c"), 1:5))
	df %>%
	dplyr::group_by(group) %>%
	dplyr::summarise(run_length_encode(x, times))
	}
	#>
	#> Attaching package: 'dplyr'
	#> The following objects are masked from 'package:stats':
	#>
	#> filter, lag
	#> The following objects are masked from 'package:base':
	#>
	#> intersect, setdiff, setequal, union
	#> `summarise()` has grouped output by 'group'. You can override using the
	#> `.groups` argument.
	#> # A tibble: 7 × 5
	#> # Groups: group [3]
	#> group x run_length start end
	#> <chr> <int> <int> <dbl> <dbl>
	#> 1 a 10 3 0.96 2.49
	#> 2 b 10 2 3.46 4.08
	#> 3 b 9 1 4.41 4.41
	#> 4 c 9 3 4.76 5.94
	#> 5 c 8 3 5.98 7.4
	#> 6 c 7 2 7.57 7.84
	#> 7 c 6 1 8.35 8.35