Created
January 6, 2015 12:11
-
-
Save donarus/1956a3326c2872e1e1eb to your computer and use it in GitHub Desktop.
get indexes of N max items in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(microbenchmark) | |
library(testthat) | |
# PARTIAL SORT | |
# using sort partial | |
pasort <- function(data, count) { | |
partIdxs <- which(data >= -sort(-data, partial=count)[count]) | |
partData <- data[partIdxs] | |
dataOrder <- order(partData, decreasing = TRUE) | |
partIdxs[dataOrder][1:count] | |
} | |
# SORT | |
# using sort and selecting N last decreasing | |
bysort <- function(data, count) { order(data, decreasing = T)[1:count] } | |
# WHICH MAX | |
# iterate count-times and select max index | |
whimax <- function(data, count) { | |
rv <- c() | |
for(i in 1:count) { | |
idx <- which.max(data) | |
rv <- c(rv, idx) | |
data[idx] <- -1 | |
} | |
rv | |
} | |
# benchmark | |
data = sample(0:1000,1000000,replace=T) | |
op <- microbenchmark( | |
SORT=bysort(data, 5), | |
SMAX=whimax(data, 5), | |
PASO=pasort(data, 5), | |
times=1000L | |
) | |
print(op) #standard data frame of the output | |
boxplot(op) #boxplot of output | |
library(ggplot2) #nice log plot of the output | |
qplot(y=time, data=op, colour=expr) + scale_y_log10() | |
test_that('same results', { | |
for (i in 0:10) { | |
for (j in 1:100) { | |
data <- sample(0:700,500,replace=T) | |
bs <- bysort(data, j) | |
wm <- whimax(data, j) | |
ps <- pasort(data, j) | |
expect_equal(bs, wm) | |
expect_equal(bs, ps) | |
} | |
} | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
microbenchmark results:
Unit: milliseconds
expr min lq mean median uq max neval
SORT 279.922795 282.54416 289.11805 283.93788 286.93312 328.62647 100
SMAX 21.961694 22.95995 53.17893 63.20664 64.36891 107.70355 100
PASO 9.500376 10.34805 25.08266 11.07473 51.86092 54.00938 100
to see the plot run in R studio