Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active March 18, 2021 21:31
Show Gist options
  • Select an option

  • Save den-crane/0c65ca11ae2d0c6f069f33c951becd5b to your computer and use it in GitHub Desktop.

Select an option

Save den-crane/0c65ca11ae2d0c6f069f33c951becd5b to your computer and use it in GitHub Desktop.
rocksDB_direct_dictionary
rocksDB is faster than MergeTree on Key/Value queries because MergeTree primary key index is sparse.
Probably it's possible to speedup MergeTree by reducing index_granularity.
The main feature of rocksDB is instant updates.
You can update row instantly (microseconds):
select * from rocksDB where A=15645646;
┌────────A─┬─B────────────────────┐
│ 15645646 │ 12517841379565221195 │
└──────────┴──────────────────────┘
1 rows in set. Elapsed: 0.001 sec.
insert into rocksDB values (15645646, 'xxxx');
1 rows in set. Elapsed: 0.001 sec.
select * from rocksDB where A=15645646;
┌────────A─┬─B────┐
│ 15645646 │ xxxx │
└──────────┴──────┘
1 rows in set. Elapsed: 0.001 sec.
------
create table rocksDB(A UInt64, B String, primary key A) Engine=EmbeddedRocksDB();
insert into rocksDB select number, toString(cityHash64(number)) from numbers(100000000);
select count() from (
select * from rocksDB where A in (select toUInt64(rand64()%100000000) from numbers(10000))
)
Elapsed: 0.076 sec. Processed 10.00 thousand rows
CREATE DICTIONARY test_rocksDB(A UInt64,B String)
PRIMARY KEY A
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 TABLE rocksDB DB 'default' USER 'default'))
LAYOUT(DIRECT());
select count() from (
select dictGet( 'default.test_rocksDB', 'B', toUInt64(rand64()%100000000) ) from numbers_mt(100000)
)
Elapsed: 0.786 sec. Processed 100.00 thousand rows
create table mergeTreeDB(A UInt64, B String) Engine=MergeTree() order by A;
insert into mergeTreeDB select number, toString(cityHash64(number)) from numbers(100000000);
CREATE DICTIONARY test_mergeTreeDB(A UInt64,B String)
PRIMARY KEY A
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 TABLE mergeTreeDB DB 'default' USER 'default'))
LAYOUT(DIRECT());
select count() from (
select * from mergeTreeDB where A in (select toUInt64(rand64()%100000000) from numbers(10000))
)
Elapsed: 0.202 sec. Processed 55.95 million rows
select count() from (
select dictGet( 'default.test_mergeTreeDB', 'B', toUInt64(rand64()%100000000) ) from numbers_mt(100000)
)
Elapsed: 3.160 sec. Processed 100.00 thousand rows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment