Skip to content

Instantly share code, notes, and snippets.

@Slach
Last active February 25, 2025 07:56
Show Gist options
  • Save Slach/78d8105b3008b81f4c63f7aba305300e to your computer and use it in GitHub Desktop.
Save Slach/78d8105b3008b81f4c63f7aba305300e to your computer and use it in GitHub Desktop.
how clickhouse-operator shall manage clickhouse-keeper scaling
  • cllickhouse-keeper have configuration in XML format,
  • NuRaft (quorum library) have internal state which stored /var/lib/clickhouse/coordination/state

24.8 clickhouse-keeper version allow rescale with declarative change the configs

services:
keeper1:
extends:
service: keeper
file: keeper-service.yaml
environment:
- KEEPER_MY_ID=1
keeper2:
extends:
service: keeper
file: keeper-service.yaml
environment:
- KEEPER_MY_ID=2
keeper3:
extends:
service: keeper
file: keeper-service.yaml
environment:
- KEEPER_MY_ID=3
<?xml version="1.0"?>
<clickhouse>
<logger>
<console>1</console>
<level>trace</level>
<!--
<log>/var/log/clickhouse-keeper/clickhouse-keeper.log</log>
<errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
-->
<size>1000M</size>
<count>10</count>
</logger>
<max_connections>4096</max_connections>
<listen_host>0.0.0.0</listen_host>
<!-- <listen_host>::</listen_host> -->
<listen_try>1</listen_try>
<keeper_server>
<tcp_port>9181</tcp_port>
<!-- Must be unique among all keeper serves -->
<server_id from_env="KEEPER_MY_ID"/>
<!--
<log_storage_path>/var/lib/clickhouse/coordination/logs</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
-->
<coordination_settings>
<raft_logs_level>information</raft_logs_level>
</coordination_settings>
<http_control>
<port>9182</port>
<readiness>
<endpoint>/ready</endpoint>
</readiness>
</http_control>
</keeper_server>
<prometheus>
<endpoint>/metrics</endpoint>
<port>7000</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<status_info>true</status_info>
</prometheus>
</clickhouse>
<clickhouse>
<keeper_server>
<raft_configuration>
<server>
<id>1</id>
<hostname>keeper1</hostname>
<port>9234</port>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>
<?xml version="1.0"?>
<clickhouse>
<keeper_server>
<raft_configuration>
<server>
<id>1</id>
<hostname>keeper1</hostname>
<port>9234</port>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>
services:
keeper:
image: ${KEEPER_IMAGE:-clickhouse/clickhouse-keeper}:${KEEPER_VERSION:-latest-alpine}
volumes:
- ./keeper-peers.xml:/etc/clickhouse-keeper/keeper_config.d/keeper-peers.xml
- ./keeper-config.xml:/etc/clickhouse-keeper/keeper_config.d/keeper-config.xml
healthcheck:
test: echo ruok | nc localhost 9181
interval: 3s
timeout: 2s
retries: 5
start_period: 2s
security_opt:
- label:disable
  • if echo srvr | nc 127.0.0.1 9181 | grep -i -c "leader" != "0" then go to another node and request be a leader echo rqld | nc 127.0.0.1 9181
    • poll echo srvr | nc 127.0.0.1 9181 | grep -i -c "follower" in node which we will drop
  • change XML /etc/clickhouse-keeper/keeper_config.d/ configmap <raft_configuration> remove <server><id>XX
  • poll /keeper/config in other nodes (node which we delete will stop answer to queries)
    • zk protocol directly or execution clickhouse-keeper client -q "get '/keeper/config'"
    • wait when gone away server.X=service-name-{shard}-{replica}:9234;participant;1
  • delete statefulset
#!/usr/bin/env bash
set -xe
for i in {3..2}; do
# * if `echo srvr | nc 127.0.0.1 9181 | grep -i -c "leader" != "0"` then go to another node and request be a leader `echo rqld | nc 127.0.0.1 9181`
set +e
is_leader=$(docker compose exec "keeper${i}" bash -c 'echo srvr | nc 127.0.0.1 9181 | grep -i -c "leader"')
set -e
if [[ "0" != "${is_leader}" ]]; then
j=$i-1
docker compose exec "keeper${j}" bash -c 'echo rqld | nc 127.0.0.1 9181'
fi
# * poll `echo srvr | nc 127.0.0.1 9181 | grep -i -c "follower"` in node which we will drop
is_follower="0"
while [[ "0" == "${is_follower}" ]]; do
set +e
is_follower=$(docker compose exec "keeper${i}" bash -c 'echo srvr | nc 127.0.0.1 9181 | grep -i -c "follower"')
set -e
sleep 1
done
# * change XML `/etc/clickhouse-keeper/keeper_config.d/` configmap `<raft_configuration>` add ''
xmlstarlet ed -L -d "/clickhouse/keeper_server/raft_configuration/server[id='${i}']" keeper-peers.xml
# * poll /keeper/config in other nodes (node which we delete will stop answer to queries)
# * wait when gone away `server.X=service-name-{shard}-{replica}:9234;participant;1`
for j in $(seq 1 $((i-1))); do
is_exists="1"
while [ "0" != "${is_exists}" ]; do
set +e
is_exists=$(docker compose exec "keeper${j}" clickhouse-keeper client -q "get '/keeper/config'" | grep -c "^server\.${i}=keeper${i}")
echo $?
set -e
sleep 1
done
done
# * delete `statefulset`
docker compose stop "keeper${i}"
docker compose rm -f "keeper${i}"
done
  • change XML /etc/clickhouse-keeper/keeper_config.d/ configmap <clickhouse><keeper_server><raft_configuration>...
    • add <server><id>X</id> <hostname>service-name-X-X</hostname> <port>9234</port></server> in raft_configuration section
  • create statefulset
  • poll /keeper/config in all nodes
    • zk protocol directly or execution clickhouse-keeper client -q "get '/keeper/config'"
    • wait when server.X=service-name-{shard}-{replica}:9234;participant;1 spread to all servers
#!/usr/bin/env bash
set -xe
docker compose down
docker compose up -d keeper1
cp -v keeper-peers.original.xml keeper-peers.xml
for i in {2..3}; do
# * change XML `/etc/clickhouse-keeper/keeper_config.d/` configmap `<clickhouse><keeper_server><raft_configuration>...`
# * add `<server><id>X</id> <hostname>service-name-X-X</hostname> <port>9234</port></server>` in raft_configuration section
# apt install -y xmlstarlet
xmlstarlet ed -L -s "/clickhouse/keeper_server/raft_configuration" -t elem -n "server" \
-s "/clickhouse/keeper_server/raft_configuration/server[last()]" -t elem -n "id" -v "$i" \
-s "/clickhouse/keeper_server/raft_configuration/server[last()]" -t elem -n "hostname" -v "keeper${i}" \
-s "/clickhouse/keeper_server/raft_configuration/server[last()]" -t elem -n "port" -v "9234" \
keeper-peers.xml
# * create `statefulset`
docker compose up -d "keeper${i}"
# * poll /keeper/config in all nodes
# * zk protocol directly or execution `clickhouse-keeper client -q "get '/keeper/config'"`
# * wait when `server.X=service-name-{shard}-{replica}:9234;participant;1` spread to all servers
for j in $(seq 1 ${i}); do
is_exists="0"
while [ "0" == "${is_exists}" ]; do
set +e
is_exists=$(docker compose exec "keeper${j}" clickhouse-keeper client -q "get '/keeper/config'" | grep -c "^server\.${i}=keeper${i}")
echo $?
set -e
sleep 1
done
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment