Last active
June 10, 2020 23:54
-
-
Save jangorecki/cf2ad1a01e7f1493a4bd3ef4444e1cbc to your computer and use it in GitHub Desktop.
sort merge benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ssa = function(unq_n, size, sort=FALSE) { | |
if (unq_n > size) return(sample.int(unq_n, size)) | |
unq_sub = seq_len(unq_n) | |
ans = sample(c(unq_sub, sample(unq_sub, size=max(size-unq_n, 0), replace=TRUE))) | |
if (sort) sort(ans) else ans | |
} | |
set.seed(108) | |
library(data.table) | |
options(width=200) | |
options(datatable.auto.index=FALSE, datatable.verbose=FALSE) ## not needed but just to be future proof if forder will setindex | |
N = 1e9L | |
## unsorted no duplicates | |
d1 = data.table(x=ssa(N, N))[, "v1":=seq_len(.N)] | |
d2 = data.table(y=ssa(N, N))[, "v2":=seq_len(.N)] | |
#d1 = data.table(x=ssa(N-1L, N))[, "v1":=seq_len(.N)] ## unsorted single duplicate | |
#d2 = data.table(y=ssa(N-1L, N))[, "v2":=seq_len(.N)] | |
setDTthreads(1L) ## single thread no index | |
options(datatable.smerge=FALSE) | |
system.time(b <- d1[d2, on="x==y"]) | |
options(datatable.smerge=TRUE) | |
system.time(s <- d1[d2, on="x==y"]) | |
all.equal(b, s) | |
setDTthreads(40L) ## all threads no index | |
options(datatable.smerge=FALSE) | |
system.time(b <- d1[d2, on="x==y"]) | |
options(datatable.smerge=TRUE) | |
system.time(s <- d1[d2, on="x==y"]) | |
all.equal(b, s) | |
setDTthreads(40L) ## all threads index | |
setindexv2 = function(x, cols) { ## pretend we are after #4386 | |
stopifnot(is.data.table(x), is.character(cols)) | |
if (is.null(attr(x, "index", TRUE))) setattr(x, "index", integer()) | |
setattr(attr(x, "index", TRUE), paste0("__", cols, collapse="__"), data.table:::forderv(x, cols, retGrp=TRUE)) | |
invisible(x) | |
} | |
setindexv2(d1, "x"); setindexv2(d2, "y") | |
options(datatable.smerge=FALSE) | |
system.time(b <- d1[d2, on="x==y"]) | |
options(datatable.smerge=TRUE) | |
system.time(s <- d1[d2, on="x==y"]) | |
all.equal(b, s) | |
setkeyv(d1, "x"); setkeyv(d2, "y"); ## all threads sorted index | |
setindexv2(d1, "x"); setindexv2(d2, "y") | |
options(datatable.smerge=FALSE) | |
system.time(b <- d1[d2, on="x==y"]) | |
options(datatable.smerge=TRUE) | |
system.time(s <- d1[d2, on="x==y"]) | |
all.equal(b, s) | |
#options(datatable.smerge=FALSE, datatable.verbose=TRUE) | |
#system.time(b <- d1[d2, on="x==y"]) | |
#options(datatable.smerge=TRUE, datatable.verbose=TRUE) | |
#system.time(s <- d1[d2, on="x==y"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# no duplicates | |
## single thread no index | |
user system elapsed | |
bmerge 394.931 32.802 427.750 | |
smerge 336.447 73.674 410.139 | |
## all threads no index | |
user system elapsed | |
bmerge 819.928 143.668 368.546 | |
smerge 1136.770 166.111 100.471 | |
## all threads index | |
user system elapsed | |
bmerge 658.381 103.473 377.290 | |
smerge 579.559 109.165 78.886 | |
## all threads sorted index | |
user system elapsed | |
bmerge 68.579 47.075 69.485 | |
smerge 37.985 42.468 20.842 | |
# single duplicate | |
## single thread no index | |
user system elapsed | |
bmerge 429.513 34.142 463.676 | |
smerge 367.919 80.007 447.952 | |
## all threads no index | |
user system elapsed | |
bmerge 819.215 149.786 368.750 | |
smerge 1191.925 212.859 137.033 | |
## all threads index | |
user system elapsed | |
bmerge 654.823 98.173 379.881 | |
smerge 623.015 160.435 115.979 | |
## all threads sorted index | |
user system elapsed | |
bmerge 87.594 47.124 94.729 | |
smerge 71.851 64.715 42.599 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# no duplicates | |
> options(datatable.smerge=FALSE, datatable.verbose=TRUE) | |
> system.time(b <- d1[d2, on="x==y"]) | |
i.y has same type (integer) as x.x. No coercion needed. | |
on= matches existing key, using key | |
Starting bmerge ... | |
bmerge done in 45.6s elapsed (42.7s cpu) | |
Constructing irows for '!byjoin || nqbyjoin' ... 0.000s elapsed (0.000s cpu) | |
user system elapsed | |
69.703 34.536 73.928 | |
> options(datatable.smerge=TRUE, datatable.verbose=TRUE) | |
> system.time(s <- d1[d2, on="x==y"]) | |
Starting smerge ... | |
smergeR: index (already indexed) took 0.000s | |
smergeR: sort (already sorted) took 0.000s | |
smergeR: alloc of size 1000000000 took 0.492s | |
smergeC: grpLens (already unique) took 0.000s | |
batching: input 1000000000 into balanced 80 batches (batchSize=12500000, lastBat | |
chSize=12500000) of sorted x y: x[1]<=y[1] && x[nx]>=y[ny]: | |
... | |
smergeC: preparing 80 batches took 0.003s | |
smergeC: 80 calls to smerge using 40/40 threads took 0.339s | |
smergeR: smergeC of 1000000000 x 1000000000 = 1000000000; took 0.342s | |
smergeR: outSmerge (was sorted) took 0.000s | |
smergeR: all took 0.835s | |
smerge done in 0.836s elapsed (14.8s cpu) | |
Constructing irows for '!byjoin || nqbyjoin' ... 0.001s elapsed (0.037s cpu) | |
user system elapsed | |
38.082 42.926 20.368 | |
# single duplicate | |
> options(datatable.smerge=FALSE, datatable.verbose=TRUE) | |
> system.time(b <- d1[d2, on="x==y"]) | |
i.y has same type (integer) as x.x. No coercion needed. | |
on= matches existing key, using key | |
Starting bmerge ... | |
bmerge done in 46.6s elapsed (42.9s cpu) | |
Constructing irows for '!byjoin || nqbyjoin' ... 20.0s elapsed (15.6s cpu) | |
user system elapsed | |
85.789 53.102 91.941 | |
> options(datatable.smerge=TRUE, datatable.verbose=TRUE) | |
> system.time(s <- d1[d2, on="x==y"]) | |
Starting smerge ... | |
smergeR: index (already indexed) took 0.000s | |
smergeR: sort (already sorted) took 0.000s | |
smergeR: alloc of size 1000000000 took 0.472s | |
smergeC: grpLens (x, y) took 0.487s | |
batching: input 999999999 into balanced 80 batches (batchSize=12500000, lastBatc | |
hSize=12499999) of sorted x y: x[1]<=y[1] && x[nx]>=y[ny]: | |
... | |
smergeC: preparing 80 batches took 0.002s | |
smergeC: 80 calls to smerge using 40/40 threads took 0.797s | |
smergeR: smergeC of 1000000000 x 1000000000 = 1000000001; took 1.286s | |
smergeR: outSmerge (was sorted) took 0.000s | |
smergeR: all took 1.759s | |
smerge done in 1.760s elapsed (34.0s cpu) | |
Constructing irows for '!byjoin || nqbyjoin' ... 20.1s elapsed (15.9s cpu) | |
user system elapsed | |
74.858 59.953 43.362 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment