Last active
August 7, 2024 04:53
-
-
Save den-crane/55f4b8d7caf9b644a347a62729be3848 to your computer and use it in GitHub Desktop.
djoin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- in different versions of CH it works differently, and does not work at all with circle replication | |
drop table t1 on cluster segmented; | |
drop table t2 on cluster segmented; | |
create table t1 on cluster segmented (A Int64) Engine=MergeTree order by tuple(); | |
create table t2 on cluster segmented (A Int64) Engine=MergeTree order by tuple(); | |
create table t1d on cluster segmented as t1 Engine=Distributed(segmented, currentDatabase(), t1, A); | |
create table t2d on cluster segmented as t2 Engine=Distributed(segmented, currentDatabase(), t2, A); | |
insert into t1d select number+10 from numbers(3); | |
insert into t2d select number+10 from numbers(2); | |
select * from t1d; | |
┌──A─┐ | |
│ 10 │ | |
│ 12 │ | |
└────┘ | |
┌──A─┐ | |
│ 11 │ | |
└────┘ | |
select * from t2d; | |
┌──A─┐ | |
│ 10 │ | |
└────┘ | |
┌──A─┐ | |
│ 11 │ | |
└────┘ | |
# distributed join!!! join locally, on the right we use a non-sharded table t2 | |
select t1d.*, t2.* from t1d left join t2 using A | |
┌──A─┬─t2.A─┐ | |
│ 10 │ 10 │ | |
│ 12 │ 0 │ | |
└────┴──────┘ | |
┌──A─┬─t2.A─┐ | |
│ 11 │ 11 │ | |
└────┴──────┘ | |
## SAME but through Distributed t2d | |
set distributed_product_mode = 'local' | |
select t1d.*, t2d.* from t1d t1d left join t2d as t2d using A | |
┌──A─┬─t2d.A─┐ | |
│ 10 │ 10 │ | |
│ 12 │ 0 │ | |
└────┴───────┘ | |
┌──A─┬─t2d.A─┐ | |
│ 11 │ 11 │ | |
└────┴───────┘ | |
## select * from t2d | |
set distributed_product_mode = 'local' | |
select t1d.*, t2d.* from t1d t1d left join (select * from t2d) as t2d using A | |
┌──A─┬─t2d.A─┐ | |
│ 11 │ 11 │ | |
└────┴───────┘ | |
┌──A─┬─t2d.A─┐ | |
│ 10 │ 10 │ | |
│ 12 │ 0 │ | |
└────┴───────┘ | |
# tables are not sharded by the same key (i.e. rand()) | |
# all shards download the entire right table from everyone else, i.e. 25 shards will select * from td2 at the same time! | |
set distributed_product_mode = 'allow' | |
select t1d.*, t2d.* from t1d left join t2d using A | |
┌──A─┬─t2d.A─┐ | |
│ 10 │ 10 │ | |
│ 12 │ 0 │ | |
└────┴───────┘ | |
┌──A─┬─t2d.A─┐ | |
│ 11 │ 11 │ | |
└────┴───────┘ | |
## Global -- the initiator downloads the right table for himself, puts it in temporary and sends it to shards, they will join with temporary | |
set distributed_product_mode = 'deny' | |
select t1d.*, t2d.* from t1d global left join t2d using A | |
┌──A─┬─t2d.A─┐ | |
│ 10 │ 10 │ | |
│ 12 │ 0 │ | |
└────┴───────┘ | |
┌──A─┬─t2d.A─┐ | |
│ 11 │ 11 │ | |
└────┴───────┘ | |
set distributed_product_mode = 'global' | |
select t1d.*, t2d.* from t1d left join t2d using A | |
┌──A─┬─t2d.A─┐ | |
│ 10 │ 10 │ | |
│ 12 │ 0 │ | |
└────┴───────┘ | |
┌──A─┬─t2d.A─┐ | |
│ 11 │ 11 │ | |
└────┴───────┘ | |
# join on the initiator, the initiator grabs the whole t2d, goes along t1d and joins | |
select t1d.*, t2d.* from (select * from t1d) as t1d left join t2d using A | |
┌──A─┬─t2d.A─┐ | |
│ 10 │ 10 │ | |
│ 12 │ 0 │ | |
└────┴───────┘ | |
┌──A─┬─t2d.A─┐ | |
│ 11 │ 11 │ | |
└────┴───────┘ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment