Skip to content

Instantly share code, notes, and snippets.

@xmlking
Created May 12, 2025 00:30
Show Gist options
  • Save xmlking/f25c38b296a5ee92900d7984396828e0 to your computer and use it in GitHub Desktop.
Save xmlking/f25c38b296a5ee92900d7984396828e0 to your computer and use it in GitHub Desktop.
streaming experiment with tansu
COMPOSE_FILE=compose.yaml
GH_PAGER=""
RUST_BACKTRACE=1
RUST_LOG=warn,tansu_server=debug,tansu_storage=debug,tansu_cli=debug,tansu_topic=debug,tansu_cat=debug,tansu_schema_registry=debug
## postgres
POSTGRES_PASSWORD=postgres
## minio
MINIO_DEFAULT_BUCKETS=tansu:public,warehouse:public
## ClickHouse
CLICKHOUSE_USER=default
CLICKHOUSE_PASSWORD=password
CLICKHOUSE_DB=meteroid
## Tansu
S3_ACCESS_KEY_ID=minioadmin
S3_SECRET_ACCESS_KEY=minioadmin
S3_REGION=us-west-1
S3_ENDPOINT=http://minio:9000
S3_ALLOW_HTTP=true
# minio storage engine
# create a bucket "tansu" at: http://localhost:9001/buckets
# or: mc mb local/tansu
STORAGE_ENGINE=s3://tansu/
# memory storage engine
# STORAGE_ENGINE="memory://tansu/"
# use this endpoint when tansu and postgreSQL are both running in docker compose:
# STORAGE_ENGINE="postgres://postgres:postgres@db"
#STORAGE_ENGINE="postgres://postgres:postgres@localhost"

image

Setup

Copy .env.example into .env so that you have a local working copy:

cp .env.example .env
uv sync --all-packages

setup rpk profile

rpk profile create local \
-s brokers=localhost:9092 

Run

Start

docker compose up
docker compose ps
docker compose down
# remove data
docker compose down -v

Create a bucket tansu and warehouse with public policy at: http://localhost:9001/buckets

Verify

docker compose exec 'tansu' '/tansu' --help
docker compose exec 'tansu' '/tansu' topic delete 'employee'
# create employee topic with schema/employee.proto
docker compose exec 'tansu' '/tansu' topic create 'employee'
# produce data/persons.json with schema/person.json
docker compose exec 'tansu' '/tansu' cat produce 'employee' 'data/employees.json'
# consume employee topic
docker compose exec 'tansu' '/tansu' cat consume 'employee' --max-wait-time-ms=5000
# list topics
rpk topic list
services:
###########################################################################
# postgres database
###########################################################################
postgres:
image: postgres:17
hostname: postgres
container_name: postgres
restart: unless-stopped
command: >
-c ssl=on
-c ssl_cert_file=/etc/ssl/certs/ssl-cert-snakeoil.pem
-c ssl_key_file=/etc/ssl/private/ssl-cert-snakeoil.key
-c wal_level=logical
-c max_wal_senders=1
-c max_replication_slots=1
ports:
- '5432:5432'
environment:
POSTGRES_DB: postgres
POSTGRES_USER: postgres
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
volumes:
- pg_data:/var/lib/postgresql/data
# - ./config/postgres/initdb.d/:/docker-entrypoint-initdb.d/
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U $$POSTGRES_USER -d $$POSTGRES_DB -q']
interval: 60s
start_period: 1m
start_interval: 10s
timeout: 15s
retries: 5
###########################################################################
# minio
# create the `tansu` and `warehouse` buckets
# with "public" policy at: http://localhost:9001/buckets
# or: mc mb local/tansu && mc mb local/warehouse
###########################################################################
minio:
image: bitnami/minio:2025.4.22
hostname: minio
container_name: minio
restart: unless-stopped
volumes:
- minio:/bitnami/minio/data
environment:
MINIO_ROOT_USER: ${S3_ACCESS_KEY_ID}
MINIO_ROOT_PASSWORD: ${S3_SECRET_ACCESS_KEY}
MINIO_DEFAULT_BUCKETS: ${MINIO_DEFAULT_BUCKETS:-tansu:public,warehouse:public}
# MINIO_DOMAIN: minio
MINIO_SKIP_CLIENT: "yes"
MINIO_PROMETHEUS_AUTH_TYPE: public
ports:
- '9000:9000'
- '9001:9001'
healthcheck:
test: ['CMD', 'mc', 'ready', 'local']
interval: 60s
start_period: 1m
start_interval: 10s
timeout: 15s
retries: 5
###########################################################################
# iceberg catalog
# http://localhost:8181/v1/config?warehouse=warehouse
# http://localhost:8181/v1/namespaces/warehouse
# http://localhost:8181/v1/namespaces/warehouse/tables/employee
###########################################################################
iceberg:
image: apache/iceberg-rest-fixture:1.9.0
hostname: iceberg
container_name: iceberg
restart: unless-stopped
environment:
AWS_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
AWS_REGION: ${S3_REGION}
CATALOG_CATALOG__IMPL: org.apache.iceberg.jdbc.JdbcCatalog
CATALOG_IO__IMPL: org.apache.iceberg.aws.s3.S3FileIO
CATALOG_S3_ENDPOINT: ${S3_ENDPOINT:-http://minio:9000}
CATALOG_S3_PATH__STYLE__ACCESS: true
# CATALOG_URI: jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory
CATALOG_URI: jdbc:sqlite:/home/iceberg/iceberg.db
CATALOG_WAREHOUSE: s3://warehouse/
volumes:
- iceberg:/home/iceberg
ports:
- '8181:8181'
depends_on:
minio:
condition: service_healthy
###########################################################################
# tansu - kafka alternative
###########################################################################
tansu:
image: ghcr.io/tansu-io/tansu:main
hostname: tansu
container_name: tansu
restart: unless-stopped
environment:
RUST_BACKTRACE: ${RUST_BACKTRACE}
RUST_LOG: ${RUST_LOG}
CLUSTER_ID: tansu_cluster
LISTENER_URL: tcp://[::]:9092
ADVERTISED_LISTENER_URL: tcp://tansu:9092
AWS_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
AWS_ALLOW_HTTP: ${S3_ALLOW_HTTP}
AWS_DEFAULT_REGION: ${S3_REGION}
AWS_ENDPOINT: ${S3_ENDPOINT:-http://minio:9000}
SCHEMA_REGISTRY: file://schema
STORAGE_ENGINE: ${STORAGE_ENGINE}
DATA_LAKE: s3://warehouse/
ICEBERG_CATALOG: http://iceberg:8181/
ICEBERG_NAMESPACE: tansu
PROMETHEUS_LISTENER_URL: tcp://[::]:9100
volumes:
- ./schema/:/schema/
- ./data/:/data/
ports:
- '9092:9092'
- '9100:9100'
depends_on:
iceberg:
condition: service_healthy
###########################################################################
# Redpanda Console
# http://localhost:8080/admin/health
# http://localhost:8080/admin/startup
# http://localhost:8080/admin/metrics
# Docs: https://github.com/redpanda-data/console/blob/master/docs/features/protobuf.md
###########################################################################
console:
image: redpandadata/console:v3.1.0
hostname: console
container_name: console
environment:
KAFKA_BROKERS: 'tansu:9092'
KAFKA_CLIENTID: 'rpconsole;broker_count=1;host_override=tansu'
SCHEMAREGISTRY_ENABLED: true
SCHEMAREGISTRY_URLS: https://demo.buf.dev/integrations/confluent/bufstream-demo
#KAFKACONNECT_ENABLED: true
#KAFKACONNECT_CLUSTERS_NAME: Connectors
#KAFKACONNECT_CLUSTERS_URLS: http://connect:8083
## Enables Protobuf deserialization
KAFKA_PROTOBUF_ENABLED: 'true'
## Instructs the console to use the Schema Registry rather than the local filesystem for deserialization
KAFKA_PROTOBUF_SCHEMAREGISTRY_ENABLED: true
## Provides an interval to refresh schemas from the schema registry.
KAFKA_PROTOBUF_REFRESHINTERVAL: 5m
ports:
- '8080:8080'
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/admin/health"]
interval: 15s
timeout: 5s
retries: 5
# depends_on:
# tansu:
# condition: service_healthy
###########################################################################
# arroyo - streaming analytics
###########################################################################
arroyo:
image: ghcr.io/arroyosystems/arroyo:latest
# image: ghcr.io/arroyosystems/arroyo-single:latest
hostname: arroyo
container_name: arroyo
profiles: [optional]
environment:
- DATABASE_HOST=postgres
- DATABASE_NAME=postgres
- DATABASE_USER=postgres
- DATABASE_PASSWORD=${POSTGRES_PASSWORD:-postgres}
ports:
- '5115:5115'
entrypoint: >
bash -c "/app/arroyo migrate && /app/arroyo cluster"
volumes:
- ./config/arroyo/arroyo.toml:/app/arroyo.toml:ro
depends_on:
postgres:
condition: service_healthy
# minio:
# condition: service_healthy
bufstream:
condition: service_healthy
healthcheck:
test: ['CMD', 'curl', '-f', 'localhost:5114/status']
interval: 10s
start_period: 5s
timeout: 10s
retries: 5
###########################################################################
# ClickHouse - batch analytics
# ClickHouse Playground: http://localhost:8123/play
###########################################################################
clickhouse:
image: clickhouse/clickhouse-server:25.4-alpine
hostname: clickhouse
container_name: clickhouse
profiles: [optional]
ports:
- '8123:8123'
- '9010:9000'
- '9009:9009'
environment:
CLICKHOUSE_USER: ${CLICKHOUSE_USER}
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD}
CLICKHOUSE_DB: ${CLICKHOUSE_DB}
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
AWS_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
AWS_REGION: ${S3_REGION}
ulimits:
nofile:
soft: 262144
hard: 262144
healthcheck:
test: ["CMD", "wget", "--tries=3", "--spider", "-q", "localhost:8123/ping"]
interval: 5s
timeout: 3s
volumes:
- clickhouse:/var/lib/clickhouse
###########################################################################
# prometheus
###########################################################################
prometheus:
image: prom/prometheus:v3.3.1
hostname: prometheus
container_name: prometheus
restart: unless-stopped
profiles: [optional]
ports:
- '9090:9090'
volumes:
- ./config/prometheus:/etc/prometheus
- prom_data:/prometheus
command:
- --web.enable-lifecycle
- --web.enable-otlp-receiver
- --config.file=/etc/prometheus/prometheus.yml
depends_on:
tansu:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "--spider", "--quiet", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
start_period: 50s
###########################################################################
# grafana
###########################################################################
grafana:
image: grafana/grafana:11.6.1
hostname: prometheus
container_name: prometheus
restart: unless-stopped
profiles: [optional]
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
ports:
- '3000:3000'
volumes:
- ./grafana/dashboards:/etc/dashboards
- ./grafana/provisioning:etc/grafana/provisioning
- grafana:/var/lib/grafana
depends_on:
prometheus:
condition: service_healthy
###########################################################################
# volumes
###########################################################################
volumes:
pg_data: {}
minio: {}
iceberg: {}
clickhouse: {}
redpanda: {}
redis_data: {}
prom_data: {}
grafana: {}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment