Created
May 9, 2026 04:05
-
-
Save jmcarp/eeef6f8b3852fbb7ba8c32d0e22f0a61 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # | |
| # Empirical test: which settings actually make ALTER TABLE ... MODIFY TTL | |
| # return without waiting for the materialization mutation, on the v23.8 fork | |
| # of ClickHouse used by oximeter. | |
| # | |
| # Predictions from reading work/src/src/Storages/StorageMergeTree.cpp: | |
| # - defaults: ALTER blocks until mutation finishes | |
| # - SETTINGS mutations_sync = 0: identical to defaults (no-op) | |
| # - SETTINGS alter_sync = 0: identical to defaults (no-op in v23.8) | |
| # - SETTINGS materialize_ttl_after_modify = 0: | |
| # ALTER returns instantly; no mutation; | |
| # old rows survive past the new horizon | |
| # | |
| # Each variant runs against a freshly-populated table so we don't get the | |
| # "second run is fast because parts already materialized" artifact. | |
| set -euo pipefail | |
| CH_BIN=${CH_BIN:-/root/code/garbage-compactor/clickhouse/work/src/build/programs/clickhouse} | |
| WORK_DIR=${WORK_DIR:-/var/tmp/ch-ttl-test} | |
| PORT_TCP=${PORT_TCP:-9300} | |
| PORT_HTTP=${PORT_HTTP:-8323} | |
| PORT_INTER=${PORT_INTER:-9309} | |
| ROWS=${ROWS:-100000000} | |
| SPAN_DAYS=${SPAN_DAYS:-60} | |
| OLD_TTL_DAYS=${OLD_TTL_DAYS:-30} | |
| NEW_TTL_DAYS=${NEW_TTL_DAYS:-7} | |
| ch_q() { | |
| "$CH_BIN" client --port "$PORT_TCP" -q "$1" | |
| } | |
| ch_q_silent() { | |
| "$CH_BIN" client --port "$PORT_TCP" -q "$1" >/dev/null | |
| } | |
| start_server() { | |
| rm -rf "$WORK_DIR" | |
| mkdir -p "$WORK_DIR/data" "$WORK_DIR/logs" "$WORK_DIR/tmp" \ | |
| "$WORK_DIR/user_files" "$WORK_DIR/format_schemas" | |
| cat > "$WORK_DIR/config.xml" <<EOF | |
| <clickhouse> | |
| <logger> | |
| <level>information</level> | |
| <log>$WORK_DIR/logs/server.log</log> | |
| <errorlog>$WORK_DIR/logs/server.err.log</errorlog> | |
| <size>100M</size> | |
| <count>3</count> | |
| </logger> | |
| <tcp_port>$PORT_TCP</tcp_port> | |
| <http_port>$PORT_HTTP</http_port> | |
| <interserver_http_port>$PORT_INTER</interserver_http_port> | |
| <listen_host>127.0.0.1</listen_host> | |
| <path>$WORK_DIR/data/</path> | |
| <tmp_path>$WORK_DIR/tmp/</tmp_path> | |
| <user_files_path>$WORK_DIR/user_files/</user_files_path> | |
| <format_schema_path>$WORK_DIR/format_schemas/</format_schema_path> | |
| <default_profile>default</default_profile> | |
| <default_database>default</default_database> | |
| <timezone>UTC</timezone> | |
| <mark_cache_size>5368709120</mark_cache_size> | |
| <max_concurrent_queries>100</max_concurrent_queries> | |
| <max_connections>100</max_connections> | |
| <users> | |
| <default> | |
| <password></password> | |
| <networks><ip>::/0</ip></networks> | |
| <profile>default</profile> | |
| <quota>default</quota> | |
| <access_management>1</access_management> | |
| </default> | |
| </users> | |
| <profiles> | |
| <default/> | |
| </profiles> | |
| <quotas> | |
| <default/> | |
| </quotas> | |
| </clickhouse> | |
| EOF | |
| "$CH_BIN" server --config-file="$WORK_DIR/config.xml" \ | |
| --pid-file="$WORK_DIR/server.pid" \ | |
| --daemon | |
| for _ in $(seq 1 30); do | |
| if ch_q "SELECT 1" >/dev/null 2>&1; then | |
| echo "server up on port $PORT_TCP (pid $(cat "$WORK_DIR/server.pid"))" | |
| return 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "server failed to start; see $WORK_DIR/logs/" >&2 | |
| return 1 | |
| } | |
| stop_server() { | |
| if [[ -f "$WORK_DIR/server.pid" ]]; then | |
| local pid | |
| pid=$(cat "$WORK_DIR/server.pid") | |
| kill "$pid" 2>/dev/null || true | |
| for _ in $(seq 1 15); do | |
| kill -0 "$pid" 2>/dev/null || break | |
| sleep 1 | |
| done | |
| kill -9 "$pid" 2>/dev/null || true | |
| fi | |
| } | |
| populate_table() { | |
| local table=$1 | |
| ch_q_silent "DROP TABLE IF EXISTS $table" | |
| ch_q_silent " | |
| CREATE TABLE $table ( | |
| ts DateTime, | |
| id UInt64, | |
| value Float64, | |
| payload String | |
| ) | |
| ENGINE = MergeTree | |
| PARTITION BY toYYYYMMDD(ts) | |
| ORDER BY (id, ts) | |
| TTL ts + INTERVAL $OLD_TTL_DAYS DAY | |
| " | |
| ch_q_silent " | |
| INSERT INTO $table | |
| SELECT | |
| now() - toIntervalSecond(rand() % ($SPAN_DAYS * 86400)) AS ts, | |
| number AS id, | |
| rand() / 0xFFFFFFFF AS value, | |
| toString(number) AS payload | |
| FROM numbers_mt($ROWS) | |
| " | |
| ch_q_silent "OPTIMIZE TABLE $table FINAL" | |
| } | |
| run_variant() { | |
| local name=$1 | |
| local settings=$2 | |
| local table="t_${name}" | |
| echo | |
| echo "============================================================" | |
| echo "variant: $name" | |
| echo " settings clause: ${settings:-<none>}" | |
| echo "============================================================" | |
| echo "[$(date +%T)] populating $table with $ROWS rows..." | |
| populate_table "$table" | |
| local rows_before parts_before | |
| rows_before=$(ch_q "SELECT count() FROM $table") | |
| parts_before=$(ch_q "SELECT count() FROM system.parts WHERE table = '$table' AND active") | |
| echo " rows before: $rows_before parts: $parts_before" | |
| local alter_sql="ALTER TABLE $table MODIFY TTL ts + INTERVAL $NEW_TTL_DAYS DAY ${settings}" | |
| echo "[$(date +%T)] running ALTER..." | |
| # --time makes clickhouse client print server-side query elapsed seconds | |
| # (e.g. "12.345") to stderr at the end of the query. Capture stderr to | |
| # a file so we can extract the timing line without losing real errors. | |
| local stderr_file | |
| stderr_file=$(mktemp -t ch-ttl-test.XXXXXX) | |
| local t0_ns t1_ns | |
| t0_ns=$(date +%s%N) | |
| if ! "$CH_BIN" client --port "$PORT_TCP" --time -q "$alter_sql" 2>"$stderr_file"; then | |
| echo "ALTER failed; clickhouse client stderr:" >&2 | |
| cat "$stderr_file" >&2 | |
| rm -f "$stderr_file" | |
| return 1 | |
| fi | |
| t1_ns=$(date +%s%N) | |
| local wall_ms=$(( (t1_ns - t0_ns) / 1000000 )) | |
| local server_seconds | |
| server_seconds=$(grep -E '^[0-9]+(\.[0-9]+)?$' "$stderr_file" | tail -1) | |
| rm -f "$stderr_file" | |
| echo "[$(date +%T)] ALTER server-side: ${server_seconds:-?}s wall-clock: ${wall_ms}ms" | |
| # Mutation state immediately after ALTER returns. If the wait was honored, | |
| # is_done = 1 here. If the ALTER bypassed the wait, is_done is likely 0. | |
| # If no mutation was created at all, the row count is 0. | |
| echo " system.mutations immediately after ALTER:" | |
| ch_q " | |
| SELECT mutation_id, is_done, parts_to_do, | |
| formatDateTime(create_time, '%T') AS created, | |
| formatDateTime(latest_fail_time, '%T') AS last_fail | |
| FROM system.mutations | |
| WHERE table = '$table' | |
| ORDER BY create_time DESC | |
| LIMIT 3 | |
| FORMAT PrettyCompactMonoBlock | |
| " | |
| # Wait for any in-flight mutation to settle so post-state checks are valid. | |
| echo " waiting for any in-flight mutation to finish..." | |
| for _ in $(seq 1 600); do | |
| local pending | |
| pending=$(ch_q "SELECT count() FROM system.mutations WHERE table = '$table' AND is_done = 0") | |
| [[ "$pending" == "0" ]] && break | |
| sleep 1 | |
| done | |
| local rows_after old_rows | |
| rows_after=$(ch_q "SELECT count() FROM $table") | |
| old_rows=$(ch_q "SELECT count() FROM $table WHERE ts < now() - toIntervalDay($NEW_TTL_DAYS)") | |
| echo " rows after mutation settled: $rows_after" | |
| echo " rows older than ${NEW_TTL_DAYS}d (0 means new TTL applied to old parts): $old_rows" | |
| } | |
| trap stop_server EXIT | |
| echo "starting clickhouse $($CH_BIN client --version 2>/dev/null || true)" | |
| start_server | |
| # Each variant runs on its own fresh table. | |
| run_variant "defaults" "" | |
| run_variant "mutations_sync_0" "SETTINGS mutations_sync = 0" | |
| run_variant "alter_sync_0" "SETTINGS alter_sync = 0" | |
| run_variant "materialize_off" "SETTINGS materialize_ttl_after_modify = 0" | |
| echo | |
| echo "============================================================" | |
| echo "done." | |
| echo " data/logs preserved at $WORK_DIR (server stopped on exit)" | |
| echo "============================================================" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment