Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active January 26, 2022 22:19
Show Gist options
  • Select an option

  • Save den-crane/bbaa6cc3c715e5729536e203cecf1620 to your computer and use it in GitHub Desktop.

Select an option

Save den-crane/bbaa6cc3c715e5729536e203cecf1620 to your computer and use it in GitHub Desktop.
quantilesTDigestMergeState
CREATE TABLE data
( timestamp DateTime,
host String,
repsonse_time Int32
) ENGINE MergeTree() ORDER BY (host, timestamp);
CREATE table data_aggregated_to_five_min
( timestamp DateTime,
host String,
quantiles_tdigest AggregateFunction(quantilesTDigest(0.75, 0.9, 0.95, 0.99), Int32)
) ENGINE = MergeTree ORDER BY (host, timestamp) ;
CREATE MATERIALIZED VIEW data_aggregated_to_five_min_mv to data_aggregated_to_five_min
AS SELECT
toStartOfFiveMinute(timestamp) AS timestamp,
host,
quantilesTDigestState(0.75, 0.9, 0.95, 0.99)(repsonse_time) AS quantiles_tdigest
FROM data
GROUP BY host,timestamp;
CREATE table data_aggregated_to_hour
( timestamp DateTime ,
host String,
quantiles_tdigest AggregateFunction(quantilesTDigest(0.75, 0.9, 0.95, 0.99), Int32) CODEC (ZSTD(2)))
ENGINE = SummingMergeTree()
Order by (host, timestamp);
CREATE MATERIALIZED VIEW data_aggregated_to_hour_mv to data_aggregated_to_hour
AS SELECT
toStartOfHour(timestamp) AS timestamp,
host ,
quantilesTDigestMergeState(0.75, 0.9, 0.95, 0.99)(quantiles_tdigest) AS quantiles_tdigest
FROM data_aggregated_to_five_min
GROUP BY host, timestamp;
insert into data select now() + number/10, toString(number%3), number%999 from numbers(100000000);
select host, quantilesTDigest(0.75, 0.9, 0.95, 0.99)(repsonse_time) from data group by host;
┌─host─┬─quantilesTDigest(0.75, 0.9, 0.95, 0.99)(repsonse_time)─┐
│ 1 │ [748.64435,898.59247,948.60986,988] │
│ 0 │ [747.6881,897.59235,947.4999,987] │
│ 2 │ [749.6969,899.59283,949.6927,989] │
└──────┴────────────────────────────────────────────────────────┘
3 rows in set. Elapsed: 1.484 sec. Processed 100.10 million rows, 1.40 GB (67.44 million rows/s., 944.09 MB/s.)
select host, quantilesTDigestMerge(0.75, 0.9, 0.95, 0.99)(quantiles_tdigest) from data_aggregated_to_five_min group by host;
┌─host─┬─quantilesTDigestMerge(0.75, 0.9, 0.95, 0.99)(quantiles_tdigest)─┐
│ 1 │ [748.7839,898.5568,949.00055,988] │
│ 0 │ [747.78394,897.55676,948.00055,987] │
│ 2 │ [749.7839,899.5568,950.00055,989] │
└──────┴─────────────────────────────────────────────────────────────────┘
3 rows in set. Elapsed: 0.991 sec. Processed 101.29 thousand rows, 27.46 MB (102.23 thousand rows/s., 27.71 MB/s.)
select host, quantilesTDigestMerge(0.75, 0.9, 0.95, 0.99)(quantiles_tdigest) from data_aggregated_to_hour group by host;
┌─host─┬─quantilesTDigestMerge(0.75, 0.9, 0.95, 0.99)(quantiles_tdigest)─┐
│ 1 │ [749.12115,898.5587,948.8921,988] │
│ 0 │ [748.00464,897.53467,947.77734,987] │
│ 2 │ [749.949,899.509,949.8807,989] │
└──────┴─────────────────────────────────────────────────────────────────┘
3 rows in set. Elapsed: 0.156 sec. Processed 8.42 thousand rows, 2.26 MB (54.06 thousand rows/s., 14.49 MB/s.)
SELECT
database,
table,
formatReadableSize(sum(data_compressed_bytes) AS size) AS compressed,
formatReadableSize(sum(data_uncompressed_bytes) AS usize) AS uncompressed,
round(usize / size, 2) AS compr_rate,
sum(rows) AS rows,
count() AS part_count
FROM system.parts
WHERE (active = 1) AND (table LIKE '%') AND (database LIKE 't')
GROUP BY
database,
table
ORDER BY size DESC;
┌─database─┬─table───────────────────────┬─compressed─┬─uncompressed─┬─compr_rate─┬──────rows─┬─part_count─┐
│ t │ data │ 125.41 MiB │ 954.63 MiB │ 7.61 │ 100100000 │ 9 │
│ t │ data_aggregated_to_five_min │ 5.88 MiB │ 206.34 MiB │ 35.07 │ 101289 │ 2 │
│ t │ data_aggregated_to_hour │ 621.80 KiB │ 27.27 MiB │ 44.91 │ 8418 │ 2 │
└──────────┴─────────────────────────────┴────────────┴──────────────┴────────────┴───────────┴────────────┘
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment