# Hosting stack for a docs MCP server.
#
# Replace <product> below with your product name on first deploy.
# Volumes: usage logs are mounted to a host path so they survive
# Watchtower-driven container recreates.
#
# This template assumes a reverse proxy / Cloudflare Tunnel terminates
# TLS in front of port 8000. Adjust if your infra differs.

services:

  # The MCP server. Watchtower auto-pulls on :latest changes.
  <product>-docs-mcp:
    image: <registry>/<owner>/<product>-docs-mcp:latest
    container_name: <product>-docs-mcp
    restart: unless-stopped
    ports:
      - "8000:8000"
    environment:
      PRODUCT_NAME: "<product>"
      PRODUCT_DOCS_URL: "https://docs.example.com"

      # Streamable-HTTP transport. Stateless mode is required for
      # production: clients don't lose sessions when Watchtower
      # recreates the container.
      MCP_TRANSPORT: streamable-http
      MCP_HOST: 0.0.0.0
      MCP_PORT: "8000"

      # If you run MetaMCP or another gateway in front and reach
      # this container via its compose DNS name (e.g. <product>-docs-mcp:8000),
      # add that hostname here. "*" disables the rebind check entirely.
      MCP_ALLOWED_HOSTS: "<product>-docs-mcp,localhost,127.0.0.1"

      # Phase 6 — reranker sidecar (jina-reranker-v2-base via llama.cpp).
      RERANK_URL: http://<product>-rerank:8080
      RERANK_POOL: "200"
      RERANK_TIMEOUT: "30"

      # Phase 8 — hybrid retrieval (BM25 + dense + RRF). Set true
      # only after the eval harness shows the dense-only path
      # missing technical-term queries that BM25 catches.
      HYBRID_SEARCH: "true"

      # Phase 10 — usage telemetry.
      USAGE_LOG_DIR: /app/var/logs
      USAGE_LOG_KEEP_DAYS: "90"
    volumes:
      # Usage logs persist across container recreates.
      - ./<product>-docs-mcp-logs:/app/var/logs
    depends_on:
      - <product>-rerank
    labels:
      # Watchtower polls *only* containers with this label set true.
      com.centurylinklabs.watchtower.enable: "true"
    networks:
      - mcp

  # Reranker sidecar — llama.cpp serving jina-reranker-v2-base.
  # Requires GPU access; adjust runtime/devices for your hardware.
  <product>-rerank:
    image: ghcr.io/ggml-org/llama.cpp:server-cuda
    container_name: <product>-rerank
    restart: unless-stopped
    # Mount the GGUF model from the host. Download from huggingface
    # (gguf-org/jina-reranker-v2-base-multilingual-GGUF) first.
    volumes:
      - /path/to/models:/models:ro
    command: >
      --model /models/jina-reranker-v2-base.Q8_0.gguf
      --reranking
      --host 0.0.0.0
      --port 8080
      --n-gpu-layers 99
      --ctx-size 4096
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    networks:
      - mcp

  # Watchtower — auto-pulls :latest on push.
  # Only watches containers labeled `com.centurylinklabs.watchtower.enable=true`.
  watchtower:
    image: containrrr/watchtower:latest
    container_name: watchtower
    restart: unless-stopped
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    environment:
      WATCHTOWER_POLL_INTERVAL: "300"   # 5 min
      WATCHTOWER_LABEL_ENABLE: "true"
      WATCHTOWER_CLEANUP: "true"        # remove old images after pull
    # If your registry requires auth, mount a docker config:
    #  volumes:
    #    - ./registry-auth.json:/config.json:ro
    networks:
      - mcp

networks:
  mcp:
    driver: bridge