Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active August 7, 2024 04:53
Show Gist options
  • Save den-crane/55f4b8d7caf9b644a347a62729be3848 to your computer and use it in GitHub Desktop.
Save den-crane/55f4b8d7caf9b644a347a62729be3848 to your computer and use it in GitHub Desktop.
djoin
-- in different versions of CH it works differently, and does not work at all with circle replication
drop table t1 on cluster segmented;
drop table t2 on cluster segmented;
create table t1 on cluster segmented (A Int64) Engine=MergeTree order by tuple();
create table t2 on cluster segmented (A Int64) Engine=MergeTree order by tuple();
create table t1d on cluster segmented as t1 Engine=Distributed(segmented, currentDatabase(), t1, A);
create table t2d on cluster segmented as t2 Engine=Distributed(segmented, currentDatabase(), t2, A);
insert into t1d select number+10 from numbers(3);
insert into t2d select number+10 from numbers(2);
select * from t1d;
┌──A─┐
│ 10 │
│ 12 │
└────┘
┌──A─┐
│ 11 │
└────┘
select * from t2d;
┌──A─┐
│ 10 │
└────┘
┌──A─┐
│ 11 │
└────┘
# distributed join!!! join locally, on the right we use a non-sharded table t2
select t1d.*, t2.* from t1d left join t2 using A
┌──A─┬─t2.A─┐
│ 10 │ 10 │
│ 12 │ 0 │
└────┴──────┘
┌──A─┬─t2.A─┐
│ 11 │ 11 │
└────┴──────┘
## SAME but through Distributed t2d
set distributed_product_mode = 'local'
select t1d.*, t2d.* from t1d t1d left join t2d as t2d using A
┌──A─┬─t2d.A─┐
│ 10 │ 10 │
│ 12 │ 0 │
└────┴───────┘
┌──A─┬─t2d.A─┐
│ 11 │ 11 │
└────┴───────┘
## select * from t2d
set distributed_product_mode = 'local'
select t1d.*, t2d.* from t1d t1d left join (select * from t2d) as t2d using A
┌──A─┬─t2d.A─┐
│ 11 │ 11 │
└────┴───────┘
┌──A─┬─t2d.A─┐
│ 10 │ 10 │
│ 12 │ 0 │
└────┴───────┘
# tables are not sharded by the same key (i.e. rand())
# all shards download the entire right table from everyone else, i.e. 25 shards will select * from td2 at the same time!
set distributed_product_mode = 'allow'
select t1d.*, t2d.* from t1d left join t2d using A
┌──A─┬─t2d.A─┐
│ 10 │ 10 │
│ 12 │ 0 │
└────┴───────┘
┌──A─┬─t2d.A─┐
│ 11 │ 11 │
└────┴───────┘
## Global -- the initiator downloads the right table for himself, puts it in temporary and sends it to shards, they will join with temporary
set distributed_product_mode = 'deny'
select t1d.*, t2d.* from t1d global left join t2d using A
┌──A─┬─t2d.A─┐
│ 10 │ 10 │
│ 12 │ 0 │
└────┴───────┘
┌──A─┬─t2d.A─┐
│ 11 │ 11 │
└────┴───────┘
set distributed_product_mode = 'global'
select t1d.*, t2d.* from t1d left join t2d using A
┌──A─┬─t2d.A─┐
│ 10 │ 10 │
│ 12 │ 0 │
└────┴───────┘
┌──A─┬─t2d.A─┐
│ 11 │ 11 │
└────┴───────┘
# join on the initiator, the initiator grabs the whole t2d, goes along t1d and joins
select t1d.*, t2d.* from (select * from t1d) as t1d left join t2d using A
┌──A─┬─t2d.A─┐
│ 10 │ 10 │
│ 12 │ 0 │
└────┴───────┘
┌──A─┬─t2d.A─┐
│ 11 │ 11 │
└────┴───────┘
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment