Created
November 27, 2019 21:41
-
-
Save Weiming-Hu/97b075463efaf28fb53c36ad01fda22f to your computer and use it in GitHub Desktop.
A function to find continuous sequences from a vector.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# "`-''-/").___..--''"`-._ | |
# (`6_ 6 ) `-. ( ).`-.__.`) WE ARE ... | |
# (_Y_.)' ._ ) `._ `. ``-..-' PENN STATE! | |
# _ ..`--'_..-_/ /--'_.' ,' | |
# (il),-'' (li),' ((!.-' | |
# | |
# | |
# Author: Weiming Hu ([email protected]) and Martina Calovi ([email protected]) | |
# Geoinformatics and Earth Observation Laboratory (http://geolab.psu.edu) | |
# Department of Geography and Institute for CyberScience | |
# The Pennsylvania State University | |
# | |
#' find.sequences | |
#' | |
#' find.sequences finds the start and end indices for | |
#' continuous sequences that have a length larger than | |
#' `min.len`. The values for the sequences should all | |
#' be larger than the threshold. | |
#' | |
#' @param v A numeric vector | |
#' @param min.len The minimum length of a sequence to | |
#' be extracted. | |
#' @param threshold The minimum value for the sequence. | |
#' | |
#' @return A data frame with the start and end indices. | |
#' | |
#' @examples | |
#' v <- sin(seq(1, 20, length.out = 200)) | |
#' v[sample(length(v), floor(length(v) * 0.2))] <- NA | |
#' | |
#' plot(v, type = 'b') | |
#' min.len <- 5 | |
#' threshold <- 0.5 | |
#' df <- find.sequences(v, 5, 0) | |
#' abline(v = df$Start.ID, col = 'green') | |
#' abline(v = df$End.ID, col = 'red') | |
#' | |
#' @md | |
#' @export | |
find.sequences <- function(v, min.len, threshold) { | |
start.id <- c() | |
end.id <- c() | |
continous.day.count <- 0 | |
record <- F | |
for (i in 1:length(v)) { | |
if (is.na(v[i])) { | |
if (record) { | |
record <- F | |
if (continous.day.count >= min.len) { | |
end.id <- c(end.id, i - 1) | |
} else { | |
start.id <- start.id[-length(start.id)] | |
} | |
} | |
continous.day.count <- 0 | |
} else { | |
if (v[i] > threshold) { | |
continous.day.count <- continous.day.count + 1 | |
if (!record) { | |
record <- T | |
start.id <- c(start.id, i) | |
} | |
} else { | |
if (record) { | |
record <- F | |
if (continous.day.count >= min.len) { | |
end.id <- c(end.id, i - 1) | |
} else { | |
start.id <- start.id[-length(start.id)] | |
} | |
} | |
continous.day.count <- 0 | |
} | |
} | |
} | |
if (record) { | |
if (continous.day.count >= min.len) { | |
end.id <- c(end.id, i) | |
} else { | |
start.id <- start.id[-length(start.id)] | |
} | |
} | |
return(data.frame(Start.ID = start.id, | |
End.ID = end.id)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment