Skip to content

Instantly share code, notes, and snippets.

View den-crane's full-sized avatar
🙀
What's happening?

Denny [DBA at Innervate] den-crane

🙀
What's happening?
View GitHub Profile
@den-crane
den-crane / gin.md
Last active February 9, 2023 08:33
Clickhouse GIN

Plain Table VS Table with Projection VS Table with GIN

try to insert 100 000 000 rows fails with OOM

set allow_experimental_inverted_index=1;

CREATE TABLE bench(c_int Int64, c_str varchar(255), c_float Float64) 
Engine=ReplacingMergeTree partition by c_int%10 order by c_int as
select toInt64(cityHash64(number)), 
@den-crane
den-crane / UniqueMergeTree.md
Last active January 20, 2023 21:30
UniqueMergeTree vs ReplacingMergeTree

Test1

UniqueMergeTree

set optimize_on_insert=0;
CREATE TABLE test_unique_mergetree(n1 UInt32,n2 UInt32,s String) ENGINE= UniqueMergeTree ORDER BY n1;

INSERT INTO test_unique_mergetree SELECT number, 1, 'hello' FROM numbers(1e8);
0 rows in set. Elapsed: 65.947 sec. Processed 100.65 million rows, 805.21 MB (1.53 million rows/s., 12.21 MB/s.)
@den-crane
den-crane / AggregatingMergeTree_populate.md
Last active January 10, 2023 15:11
How to populate some column in AggregatingMergeTree

How to populate some column in AggregatingMergeTree if you use AggregateFunctions for all measurements? You can use simple insert and omit columns which should not be populated. AggregateFunction States do everything else magically

CREATE TABLE values_minute
(
    id LowCardinality(String),
    ts DateTime,
    count_state AggregateFunction(count),
    max_state AggregateFunction(max, Float64),
WITH table AS ( SELECT cityHash64(number) AS value from numbers(100000000)),
(SELECT quantilesExactInclusive(0.25, 0.5, 1)(value) FROM table) AS dist
select upper, lower, count() from (
SELECT arrayLastIndex(i -> (i < value), dist) AS index,
dist[least(index, length(dist)-1)] lower,
dist[least(index+1, length(dist))] upper
FROM table)
group by upper, lower
order by upper, lower
@den-crane
den-crane / ch_vs_starrocks_random_data.md
Last active January 20, 2023 16:43
ch_vs_starrocks test with random data

dataset 100 mil rows

clickhouse-client -q 'select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers_mt(100000000) format CSV' > bench.csv

ls -lh bench.csv
7.4G Dec 29 17:58 bench.csv

create table & load data

@den-crane
den-crane / ch_join_algorithm.sql
Last active April 18, 2024 16:25
CH 22.12 join_algorithm
create table A (A Int64, B Int64, S String) Engine=MergeTree order by A
as select number,number, toString(arrayMap(i->cityHash64(i*number), range(100))) from numbers(1e7);
select * from A a join A as b on a.A = b.A format Null;
SET join_algorithm = 'hash';
Peak memory usage (for query): 32.62 GiB.
0 rows in set. Elapsed: 20.700 sec. Processed 20.00 million rows, 41.31 GB (966.18 thousand rows/s., 2.00 GB/s.)
@den-crane
den-crane / projections.sql
Last active August 29, 2022 21:08
projections.sql
create table z(Browser String, Country UInt8, D Date, F Float64) Engine=MergeTree order by (Browser, Country, D);
insert into z select toString(number%9999), number%11111, today()-7, 1 from numbers(100000000);
insert into z select toString(number%9999), number%11111, today()-6, 1 from numbers(100000000);
insert into z select toString(number%9999), number%11111, today()-5, 1 from numbers(100000000);
insert into z select toString(number%9999), number%11111, today()-4, 1 from numbers(100000000);
insert into z select toString(number%9999), number%11111, today()-3, 1 from numbers(100000000);
insert into z select toString(number%9999), number%11111, today()-2, 1 from numbers(100000000);
insert into z select toString(number%9999), number%11111, today()-1, 1 from numbers(100000000);
<?xml version="1.0" ?>
<clickhouse>
<profiles>
<default>
<async_insert>true</async_insert>
<wait_for_async_insert>0</wait_for_async_insert>
<async_insert_threads>32</async_insert_threads>
<async_insert_max_data_size>20000000</async_insert_max_data_size>
<async_insert_busy_timeout_ms>30000</async_insert_busy_timeout_ms>
</default>
@den-crane
den-crane / go_test_long_sql.md
Last active June 29, 2022 17:31
go driver / test very long SQL

CH settings

cat /etc/clickhouse-server/users.d/max_query_size.xml
<?xml version="1.0" ?>
<yandex>
    <profiles>
        <default>
            <max_query_size>5242880</max_query_size>
        </default>
@den-crane
den-crane / get_max_order_date.md
Last active November 2, 2022 05:37
get max order_date

No optimizations

create table my_tbl ( order_id int , order_date date ) 
engine = MergeTree 
order by order_id; 

insert into my_tbl select number, today() + intDiv(number, 1000000) from numbers(1e9);

select max(order_date) from my_tbl;