Created
April 4, 2018 15:22
-
-
Save gmbecker/23f5db998d782a172e83717fa9956568 to your computer and use it in GitHub Desktop.
Finding vector in larger vector #rstats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
findneedle = function(need, hay) { | |
inds = which(hay == need[1L]) | |
i = 2L | |
while(i <= length(need) && length(inds) > 0L) { | |
nval = need[i] | |
## adding the scalars together first reduces the allocs | |
## by 1 saves time if inds is really long | |
inds = inds[hay[(i - 1L) + inds] == nval] | |
i = i + 1L | |
} | |
inds | |
} | |
findneedle2 = function(need, hay) { | |
inds = which(hay == need[1L]) | |
i = 2L | |
while(i < length(need) && length(inds) > 0L) { | |
nval = need[i] | |
## adding the scalars together first reduces the allocs | |
## by 1 saves time if inds is really long | |
inds = inds[hay[(i - 1L) + inds] == nval] | |
i = i + 1L | |
} | |
## if need is reasonably long, this doesn't make much | |
## difference but if need is very short, avoiding | |
## allocating a vector for all the matching indices helps | |
match(tail(need, 1L), hay[inds]) | |
} | |
library(microbenchmark) | |
hay = sample(1:10, 5000000, replace=TRUE) | |
microbenchmark(findneedle(c(2L, 5L), hay), findneedle2(c(2L, 5L), hay)) | |
## microbenchmark(findneedle(c(2L, 5L), hay), findneedle2(c(2L, 5L), hay)) | |
## Unit: milliseconds | |
## expr min lq mean median uq | |
## findneedle(c(2L, 5L), hay) 56.28046 63.55553 69.52761 66.23459 69.87862 | |
## findneedle2(c(2L, 5L), hay) 50.17408 55.78857 64.52079 58.59603 63.12750 | |
## max neval | |
## 119.8740 100 | |
## 117.0698 100 | |
microbenchmark(findneedle(c(2L, 5L, 3L, 6L, 4L), hay), findneedle2(c(2L, 5L, 3L, 6L, 4L), hay)) | |
## microbenchmark(findneedle(c(2L, 5L, 3L, 6L, 4L), hay), findneedle2(c(2L, 5L, 3L, 6L, 4L), hay)) | |
## Unit: milliseconds | |
## expr min lq mean median | |
## findneedle(c(2L, 5L, 3L, 6L, 4L), hay) 59.55968 65.23221 71.73937 67.40074 | |
## findneedle2(c(2L, 5L, 3L, 6L, 4L), hay) 63.03239 64.71153 69.61624 66.92804 | |
## uq max neval | |
## 69.43795 111.5696 100 | |
## 69.08548 119.1687 100 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment