Skip to content

Instantly share code, notes, and snippets.

@ChristianKniep
Last active October 30, 2025 10:44
Show Gist options
  • Select an option

  • Save ChristianKniep/a5f522a8f2380dc1b1d7e883a7e1a8fc to your computer and use it in GitHub Desktop.

Select an option

Save ChristianKniep/a5f522a8f2380dc1b1d7e883a7e1a8fc to your computer and use it in GitHub Desktop.
MemMachine Locust Benchmark: docker-compose
services:
dcgm:
container_name: dcgm
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.4.1-4.6.0-ubuntu22.04
runtime: nvidia
privileged: true
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- CUDA_VISIBLE_DEVICES=0
- LOG_LEVEL=debug
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
count: all
ports:
- "9400:9400"
networks:
- memmachine-network
node:
image: quay.io/prometheus/node-exporter:latest
ports:
- "9100:9100"
command:
- '--collector.disable-defaults'
- '--collector.cpu'
- '--collector.meminfo'
- '--collector.loadavg'
restart: unless-stopped
networks:
- memmachine-network
prometheus:
image: prom/prometheus
volumes:
- prometheus_data:/prometheus
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=15d"
restart: always
networks:
- memmachine-network
grafana:
image: grafana/grafana:latest
container_name: grafana
user: "472:0" # runs as grafana user to avoid permissions issues
ports:
- "3000:3000" # http://localhost:3000
environment:
GF_SECURITY_ADMIN_USER: admin # change in production
GF_SECURITY_ADMIN_PASSWORD: admin
GF_SERVER_ROOT_URL: "%(protocol)s://%(domain)s/"
GF_USERS_ALLOW_SIGN_UP: "false"
# Optional: preinstall plugins (comma-separated)
# GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-worldmap-panel
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health | grep -q 'ok'"]
interval: 10s
timeout: 3s
retries: 10
volumes:
- grafana-data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro # optional: drop datasources/dashboards here
#- ./grafana/grafana.ini:/etc/grafana/grafana.ini:ro # optional: custom config
restart: unless-stopped
networks:
- memmachine-network
ollama:
container_name: ollama
image: ollama/ollama:latest
runtime: nvidia
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- CUDA_VISIBLE_DEVICES=0
- LOG_LEVEL=debug
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
count: all
volumes:
- ollama:/root/.ollama
- models:/models
ports:
- "11434:11434"
networks:
- memmachine-network
litellm:
image: ghcr.io/berriai/litellm:main-stable
#########################################
## Uncomment these lines to start proxy with a config.yaml file ##
volumes:
- ./litellm/config.yaml:/app/config.yaml
command:
- "--config=/app/config.yaml"
##############################################
ports:
- "4000:4000" # Map the container port to the host, change the host port if necessary
environment:
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
LITELLM_MASTER_KEY: "sk-1234"
LITELLM_SALT_KEY: "sk-4321"
depends_on:
- db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
healthcheck: # Defines the health check configuration for the container
test: [ "CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:4000/health/liveliness || exit 1" ] # Command to execute for health check
interval: 30s # Perform health check every 30 seconds
timeout: 10s # Health check command times out after 10 seconds
retries: 3 # Retry up to 3 times if health check fails
start_period: 40s # Wait 40 seconds after container start before beginning health checks
networks:
- memmachine-network
db:
image: postgres:16
restart: always
container_name: litellm_db
environment:
POSTGRES_DB: litellm
POSTGRES_USER: llmproxy
POSTGRES_PASSWORD: dbpassword9090
#ports:
# - "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data # Persists Postgres data across container restarts
healthcheck:
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
interval: 1s
timeout: 5s
retries: 10
networks:
- memmachine-network
networks:
memmachine-network:
external: true
volumes:
grafana-data:
prometheus_data:
postgres_data:
ollama:
models:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment