Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active May 29, 2024 23:22
Show Gist options
  • Save den-crane/fb1108e4843250a71bac7528a79700c2 to your computer and use it in GitHub Desktop.
Save den-crane/fb1108e4843250a71bac7528a79700c2 to your computer and use it in GitHub Desktop.
Clickhouse fast not exists
ClickHouse server version 18.14.12 revision 54409.
create table data(K Int64, V String) engine=MergeTree order by K;
insert into data select number, toString(number) from numbers(100,100000000);
optimize table data final;
create table buffer(K Int64, V String) engine=Memory;
insert into buffer select number, toString(number) from numbers(0,1000);
# select count() from buffer where K not in (select K from data);
┌─count()─┐
│ 100 │
└─────────┘
1 rows in set. Elapsed: 7.136 sec. Processed 100.00 million rows, 800.01 MB (14.01 million rows/s., 112.10 MB/s.)
# select count() from buffer b any left outer join data d on (d.K = b.K) where data.K = 0;
┌─count()─┐
│ 100 │
└─────────┘
1 rows in set. Elapsed: 10.258 sec. Processed 100.00 million rows, 800.01 MB (9.75 million rows/s., 77.99 MB/s.)
# select count() from buffer where K not in (select K from data where K in (select K from buffer));
┌─count()─┐
│ 100 │
└─────────┘
1 rows in set. Elapsed: 0.002 sec. Processed 9.19 thousand rows, 73.54 KB (3.96 million rows/s., 31.66 MB/s.)
drop table data;
create table data(K Int64, V String) engine=MergeTree order by K settings index_granularity = 512;
insert into data select number, toString(number) from numbers(100,100000000);
optimize table data final;
# select count() from buffer where K not in (select K from data where K in (select K from buffer));
┌─count()─┐
│ 100 │
└─────────┘
1 rows in set. Elapsed: 0.002 sec. Processed 2.02 thousand rows, 16.19 KB (913.48 thousand rows/s., 7.31 MB/s.)
# insert into data select * from buffer where K not in (select K from data where K in (select K from buffer));
0 rows in set. Elapsed: 0.003 sec. Processed 2.02 thousand rows, 28.08 KB (687.10 thousand rows/s., 9.53 MB/s.)
# select count() from buffer where K not in (select K from data where K in (select K from buffer));
0 rows in set. Elapsed: 0.002 sec. Processed 2.12 thousand rows, 16.99 KB (934.36 thousand rows/s., 7.47 MB/s.)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment