Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active January 20, 2023 16:43
Show Gist options
  • Save den-crane/c9e6eff2611d52196e491dc522e654d4 to your computer and use it in GitHub Desktop.
Save den-crane/c9e6eff2611d52196e491dc522e654d4 to your computer and use it in GitHub Desktop.
ch_vs_starrocks test with random data

dataset 100 mil rows

clickhouse-client -q 'select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers_mt(100000000) format CSV' > bench.csv

ls -lh bench.csv
7.4G Dec 29 17:58 bench.csv

create table & load data

set max_bytes_before_external_group_by=0;

drop table if exists bench;
CREATE TABLE bench(c_int Int64, c_str varchar(255), c_float Float64) 
Engine=MergeTree order by c_int;

time clickhouse-client  -q 'insert into bench format CSV' < bench.csv
real	0m26.273s

┌─database─┬─table─┬─default_compression_codec─┬─compressed─┬─uncompressed─┬─compr_rate─┬──────rows─┬─part_count─┐
│ default  │ bench │ LZ4                       │ 4.58 GiB   │ 4.56 GiB     │          11000000008 │
└──────────┴───────┴───────────────────────────┴────────────┴──────────────┴────────────┴───────────┴────────────┘
curl -X POST 'http://10.253.215.3:8040/api/update_config?streaming_load_max_mb=102400'

mysql -h 10.253.215.3 -P 9030 -uroot
 
CREATE DATABASE IF NOT EXISTS dw;
use dw;
drop table if exists bench;

CREATE TABLE bench(c_int BIGINT, c_str STRING, c_float DOUBLE) 
ENGINE=OLAP PRIMARY KEY(c_int) 
DISTRIBUTED BY HASH(c_int) PROPERTIES( "replication_num" = "1" );

time curl --location-trusted -u root: -H "column_separator:," -H "columns: c_int, c_str, c_float" -T bench.csv -XPUT  http://10.253.215.3:8030/api/dw/bench/_stream_load
real	0m50.686s

show data;
+-----------+----------------+---------------------+
| TableName | Size           | ReplicaCount        |
+-----------+----------------+---------------------+
| bench     | 4.765 GB       | 10                  |

avg float

select avg(c_float) from bench;
┌──────────avg(c_float)─┐
│ 0.0006192485500413683 │
└───────────────────────┘
1 row in set. Elapsed: 0.109 sec. Processed 100.00 million rows, 800.00 MB (918.10 million rows/s., 7.34 GB/s.)
select avg(c_float) from bench;
+-----------------------+
| avg(c_float)          |
+-----------------------+
| 0.0006192485500413731 |
+-----------------------+
1 row in set (0.129 sec)

sum int

select sum(c_int) from bench;
┌──────────sum(c_int)─┐
│ 3142669981367830385 │
└─────────────────────┘
1 row in set. Elapsed: 0.095 sec. Processed 100.00 million rows, 800.00 MB (1.05 billion rows/s., 8.41 GB/s.)
select sum(c_int) from bench;
+---------------------+
| sum(c_int)          |
+---------------------+
| 3142669981367830385 |
+---------------------+
1 row in set (0.115 sec)

max

select max(c_int), max(c_str), max(c_float) from bench;
┌──────────max(c_int)─┬─max(c_str)───────────────────────┬─────max(c_float)─┐
│ 9223371898726285859 │ ffffffdecb529477aba61dd312b6a93f │ 6257.01964106231 │
└─────────────────────┴──────────────────────────────────┴──────────────────┘
1 row in set. Elapsed: 1.176 sec. Processed 100.00 million rows, 5.70 GB (85.06 million rows/s., 4.85 GB/s.)
select max(c_int), max(c_str), max(c_float) from bench;
+---------------------+------------------------------------+------------------+
| max(c_int)          | max(c_str)                         | max(c_float)     |
+---------------------+------------------------------------+------------------+
| 9223371898726285859 | "ffffffdecb529477aba61dd312b6a93f" | 6257.01964106231 |
+---------------------+------------------------------------+------------------+
1 row in set (0.831 sec)

groupby string col ( not in PK )

select c_str from bench group by c_str order by c_str limit 1;
┌─c_str────────────────────────────┐
│ 0000002760a7f6313eb52ef22f47137a │
└──────────────────────────────────┘
1 row in set. Elapsed: 9.789 sec. Processed 100.00 million rows, 4.10 GB (10.22 million rows/s., 418.82 MB/s.)
select c_str from bench group by c_str order by c_str limit 1;
+------------------------------------+
| c_str                              |
+------------------------------------+
| "0000002760a7f6313eb52ef22f47137a" |
+------------------------------------+
1 row in set (5.984 sec)
select c_str from bench group by c_str limit 1;
┌─c_str────────────────────────────┐
│ b20d2d29ee46f9b13ccbc0cc269a3dbe │
└──────────────────────────────────┘
1 row in set. Elapsed: 4.764 sec. Processed 100.00 million rows, 4.10 GB (20.99 million rows/s., 860.68 MB/s.)
 select c_str from bench group by c_str limit 1;
+------------------------------------+
| c_str                              |
+------------------------------------+
| "23f51db65775984a133f0c0017685637" |
+------------------------------------+
1 row in set (4.727 sec)

groupby int col ( in PK )

select c_int from bench group by c_int order by c_int limit 1;
┌────────────────c_int─┐
│ -9223371953165115584 │
└──────────────────────┘
1 row in set. Elapsed: 3.712 sec. Processed 100.00 million rows, 800.00 MB (26.94 million rows/s., 215.50 MB/s.)
select c_int from bench group by c_int order by c_int limit 1;
+----------------------+
| c_int                |
+----------------------+
| -9223371953165115584 |
+----------------------+
1 row in set (2.698 sec)

order by string

select c_str from bench order by c_str limit 1;
┌─c_str────────────────────────────┐
│ 0000002760a7f6313eb52ef22f47137a │
└──────────────────────────────────┘
1 row in set. Elapsed: 0.728 sec. Processed 100.00 million rows, 4.10 GB (137.29 million rows/s., 5.63 GB/s.)
select c_str from bench order by c_str limit 1;
+------------------------------------+
| c_str                              |
+------------------------------------+
| "0000002760a7f6313eb52ef22f47137a" |
+------------------------------------+
1 row in set (0.702 sec)

like

select count() from bench where c_str like '%00000%';
┌─count()─┐
│    2469 │
└─────────┘
1 row in set. Elapsed: 0.671 sec. Processed 100.00 million rows, 4.10 GB (148.94 million rows/s., 6.11 GB/s.)
select count() from bench where c_str like '%00000%';
+---------+
| count() |
+---------+
|    2469 |
+---------+
1 row in set (0.217 sec)

filter by float and order

select c_str from bench where  c_float <= 1000 order by c_int limit 1;
┌─c_str────────────────────────────┐
│ 00dce811cb725e98d64ea581c0db5536 │
└──────────────────────────────────┘
1 row in set. Elapsed: 0.015 sec. Processed 376.83 thousand rows, 21.48 MB (25.76 million rows/s., 1.47 GB/s.)


select c_str from bench where  c_float <= 1000 order by c_int limit 1 settings optimize_read_in_order=0;
┌─c_str────────────────────────────┐
│ 00dce811cb725e98d64ea581c0db5536 │
└──────────────────────────────────┘
1 row in set. Elapsed: 1.078 sec. Processed 100.00 million rows, 5.70 GB (92.77 million rows/s., 5.29 GB/s.)
select c_str from bench where  c_float <= 1000 order by c_int limit 1;
+------------------------------------+
| c_str                              |
+------------------------------------+
| "00dce811cb725e98d64ea581c0db5536" |
+------------------------------------+
1 row in set (0.644 sec)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment