Created
October 8, 2022 19:44
-
-
Save dvgodoy/63941ce7a7eab845dc990e7bdda2bff8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def bounds(data, max_contiguous_na=5): | |
# Returns the start and end indices of the longest | |
# valid sequence, that is, containing up to a given | |
# number of contiguous missing points | |
# Gets the indices of the non-null points | |
idxs = np.arange(len(data))[~np.isnan(data)] | |
max_size = 0 | |
max_ini = 0 | |
size = 1 | |
ini = 0 | |
# Calculates the size of the gaps of missing data | |
gaps = np.diff(idxs) - 1 | |
for i, v in enumerate(gaps): | |
# If there's no gap, the size of valid data is increased by 1 | |
if v == 0: | |
size += 1 | |
# If that's the long sequence of values containing tolerable | |
# gaps then updates max info | |
if size > max_size: | |
max_size = size | |
max_ini = ini | |
# If the gaps is larger than tolerable, resets size and init | |
if v > max_contiguous_na: | |
ini = i + 1 | |
size = 1 | |
# If the gap is tolerable, adds one to the size | |
# (that means the next idx) | |
elif v > 0: | |
size += 1 | |
# Computes the end of the longest sequence | |
max_end = max_ini + max_size | |
# Returns the start and end indices of the longest sequence | |
ini, end = idxs[max_ini], idxs[max_end-1] + 1 | |
return ini, end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment