Skip to content

Instantly share code, notes, and snippets.

View den-crane's full-sized avatar
🙀
What's happening?

Denny [DBA at Innervate] den-crane

🙀
What's happening?
View GitHub Profile
@den-crane
den-crane / AggregatingMergeTree_projection.sql
Last active February 10, 2023 04:54
AggregatingMergeTree + projection
create table test (
A Int64,
B String,
SomeID AggregateFunction(uniq, Int64),
projection p1 (select B, uniqMergeState(SomeID) group by B)
)
Engine=AggregatingMergeTree order by (A, B);
insert into test select number A, number%3 B, uniqState(toInt64(rand64())) from numbers(1e7) group by A,B;
@den-crane
den-crane / UInt32_VS_DateTime.sql
Last active February 9, 2023 15:57
UInt32 VS DateTime
create table test_d( a Int64, i UInt32, d DateTime) Engine=MergeTree order by a;
insert into test_d
select number+rand()%50, toUInt32(toDateTime('2020-01-01 00:00:00')+number) i , toDateTime(i)
from numbers(1e8);
SELECT
database,
table,
column,
@den-crane
den-crane / orphan_parts_issue_repro.md
Last active January 31, 2023 22:29
orphan_parts_issue_repro
create table test_bug(A Int64, D Date, S String) 
Engine = ReplicatedMergeTree('/clickhouse/tables/test_bug_orphans', '{replica}')
partition by D order by A;

insert into test_bug select number, today(), '' from numbers(1000);
insert into test_bug select number, today()-1, '' from numbers(1000);
insert into test_bug select number, today()-1, '' from numbers(1001);
insert into test_bug select number, today()-1, '' from numbers(1002);
select sleep(.5);
@den-crane
den-crane / mv.md
Created January 22, 2023 14:55
Using Materialized Views in ClickHouse
(
    `date` Date,
    `project` LowCardinality(String),
    `hits` UInt32
)
ENGINE = SummingMergeTree
ORDER BY (date, project, hits);
@den-crane
den-crane / gin.md
Last active February 9, 2023 08:33
Clickhouse GIN

Plain Table VS Table with Projection VS Table with GIN

try to insert 100 000 000 rows fails with OOM

set allow_experimental_inverted_index=1;

CREATE TABLE bench(c_int Int64, c_str varchar(255), c_float Float64) 
Engine=ReplacingMergeTree partition by c_int%10 order by c_int as
select toInt64(cityHash64(number)), 
@den-crane
den-crane / UniqueMergeTree.md
Last active January 20, 2023 21:30
UniqueMergeTree vs ReplacingMergeTree

Test1

UniqueMergeTree

set optimize_on_insert=0;
CREATE TABLE test_unique_mergetree(n1 UInt32,n2 UInt32,s String) ENGINE= UniqueMergeTree ORDER BY n1;

INSERT INTO test_unique_mergetree SELECT number, 1, 'hello' FROM numbers(1e8);
0 rows in set. Elapsed: 65.947 sec. Processed 100.65 million rows, 805.21 MB (1.53 million rows/s., 12.21 MB/s.)
@den-crane
den-crane / AggregatingMergeTree_populate.md
Last active January 10, 2023 15:11
How to populate some column in AggregatingMergeTree

How to populate some column in AggregatingMergeTree if you use AggregateFunctions for all measurements? You can use simple insert and omit columns which should not be populated. AggregateFunction States do everything else magically

CREATE TABLE values_minute
(
    id LowCardinality(String),
    ts DateTime,
    count_state AggregateFunction(count),
    max_state AggregateFunction(max, Float64),
WITH table AS ( SELECT cityHash64(number) AS value from numbers(100000000)),
(SELECT quantilesExactInclusive(0.25, 0.5, 1)(value) FROM table) AS dist
select upper, lower, count() from (
SELECT arrayLastIndex(i -> (i < value), dist) AS index,
dist[least(index, length(dist)-1)] lower,
dist[least(index+1, length(dist))] upper
FROM table)
group by upper, lower
order by upper, lower
@den-crane
den-crane / ch_vs_starrocks_random_data.md
Last active January 20, 2023 16:43
ch_vs_starrocks test with random data

dataset 100 mil rows

clickhouse-client -q 'select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers_mt(100000000) format CSV' > bench.csv

ls -lh bench.csv
7.4G Dec 29 17:58 bench.csv

create table & load data

@den-crane
den-crane / ch_join_algorithm.sql
Last active June 6, 2025 14:07
CH 22.12 join_algorithm
create table A (A Int64, B Int64, S String) Engine=MergeTree order by A
as select number,number, toString(arrayMap(i->cityHash64(i*number), range(100))) from numbers(1e7);
select * from A a join A as b on a.A = b.A format Null;
SET join_algorithm = 'hash';
Peak memory usage (for query): 32.62 GiB.
0 rows in set. Elapsed: 20.700 sec. Processed 20.00 million rows, 41.31 GB (966.18 thousand rows/s., 2.00 GB/s.)