tripolskypetr · January 19, 2026 15:53
diff --git a/docker-compose.yaml b/docker-compose.yaml
 version: "3.8"

 services:
  ollama:
    build: .
    container_name: ollama
    ports:
      - "11434:11434"
    volumes:
      - ./.ollama:/root/.ollama
    environment:
      OLLAMA_HOST: 0.0.0.0
      OLLAMA_MAX_LOADED_MODELS: 2
      OLLAMA_NUM_PARALLEL: 8
      OLLAMA_MAX_QUEUE: 96
      OLLAMA_KEEP_ALIVE: 24h
      OLLAMA_FLASH_ATTENTION: 1
      OLLAMA_KV_CACHE_TYPE: q4_0
      NVIDIA_VISIBLE_DEVICES: all
      NVIDIA_DRIVER_CAPABILITIES: compute,utility
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: always    
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:11434/api/version"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 20s
diff --git a/Dockerfile b/Dockerfile
 FROM ollama/ollama

 RUN apt-get update && \
    apt-get install -y curl && \
    rm -rf /var/lib/apt/lists/*
diff --git a/DOCS.md b/DOCS.md
diff --git a/install.sh b/install.sh
 docker-compose exec ollama sh
 ollama pull gemma3:12b # 8Gb
 ollama pull qwen3-vl:8b # 6 Gb
diff --git a/watch.sh b/watch.sh
 watch -n 1 -d nvidia-smi
	version: "3.8"

	services:
	ollama:
	build: .
	container_name: ollama
	ports:
	- "11434:11434"
	volumes:
	- ./.ollama:/root/.ollama
	environment:
	OLLAMA_HOST: 0.0.0.0
	OLLAMA_MAX_LOADED_MODELS: 2
	OLLAMA_NUM_PARALLEL: 8
	OLLAMA_MAX_QUEUE: 96
	OLLAMA_KEEP_ALIVE: 24h
	OLLAMA_FLASH_ATTENTION: 1
	OLLAMA_KV_CACHE_TYPE: q4_0
	NVIDIA_VISIBLE_DEVICES: all
	NVIDIA_DRIVER_CAPABILITIES: compute,utility
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: all
	capabilities: [gpu]
	restart: always
	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:11434/api/version"]
	interval: 30s
	timeout: 5s
	retries: 3
	start_period: 20s
	docker-compose exec ollama sh
	ollama pull gemma3:12b # 8Gb
	ollama pull qwen3-vl:8b # 6 Gb