Last active
May 8, 2024 10:57
-
-
Save den-crane/d03524eadbbce0bafa528101afa8f794 to your computer and use it in GitHub Desktop.
AggregatingMergeTree-event-enrichment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE states_raw( | |
d date, | |
uid UInt64, | |
first_name String, | |
last_name String, | |
modification_timestamp_mcs DateTime64(3) default now64(3) | |
) ENGINE = Null; | |
CREATE TABLE final_states_by_month( | |
d date, | |
uid UInt64, | |
final_first_name AggregateFunction(argMax, String, DateTime64(3)), | |
final_last_name AggregateFunction(argMax, String, DateTime64(3))) | |
ENGINE = AggregatingMergeTree | |
PARTITION BY toYYYYMM(d) | |
ORDER BY (uid, d); | |
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS | |
SELECT | |
d, uid, | |
argMaxState(first_name, if(first_name<>'', modification_timestamp_mcs, toDateTime64(0,3))) AS final_first_name, | |
argMaxState(last_name, if(last_name<>'', modification_timestamp_mcs, toDateTime64(0,3))) AS final_last_name | |
FROM states_raw | |
GROUP BY d, uid; | |
insert into states_raw(d,uid,first_name) values (today(), 1, 'Tom'); | |
insert into states_raw(d,uid,last_name) values (today(), 1, 'Jones'); | |
insert into states_raw(d,uid,first_name,last_name) values (today(), 2, 'XXX', ''); | |
insert into states_raw(d,uid,first_name,last_name) values (today(), 2, 'YYY', 'YYY'); | |
select uid, argMaxMerge(final_first_name) first_name, argMaxMerge(final_last_name) last_name | |
from final_states_by_month group by uid | |
┌─uid─┬─first_name─┬─last_name─┐ | |
│ 2 │ YYY │ YYY │ | |
│ 1 │ Tom │ Jones │ | |
└─────┴────────────┴───────────┘ | |
optimize table final_states_by_month final; | |
select uid, finalizeAggregation(final_first_name) first_name, finalizeAggregation(final_last_name) last_name | |
from final_states_by_month | |
┌─uid─┬─first_name─┬─last_name─┐ | |
│ 1 │ Tom │ Jones │ | |
│ 2 │ YYY │ YYY │ | |
└─────┴────────────┴───────────┘ |
@den-crane Shouldn't columns on line 16 and line 24 match?
ORDER BY (uid)
vsGROUP BY d, uid
they should, my bad
the same with Nullable https://fiddle.clickhouse.com/f0f3f99c-eaf6-43d9-9851-f5be8511ee8b
Thank you @den-crane. I wonder what would happen if we have billion's of records and we try to filter by say first_name
. Kindly correct me if I am wrong, I believe it would have to do read all the rows to finally return.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@den-crane Shouldn't columns on line 16 and line 24 match?
ORDER BY (uid)
vsGROUP BY d, uid