morpheus-docs/deploy/docker-compose.yml

# Hosting stack for a docs MCP server.
#
# Replace hvm below with your product name on first deploy.
# Volumes: usage logs are mounted to a host path so they survive
# Watchtower-driven container recreates.
#
# This template assumes a reverse proxy / Cloudflare Tunnel terminates
# TLS in front of port 8000. Adjust if your infra differs.

services:

  # The MCP server. Watchtower auto-pulls on :latest changes.
  morpheus-docs-mcp:
    image: git.jpaul.io/justin/morpheus-docs:latest
    container_name: morpheus-docs-mcp
    restart: unless-stopped
    ports:
      - "8000:8000"
    environment:
      PRODUCT_NAME: "morpheus"
      PRODUCT_DOCS_URL: "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007732en_us"

      # Streamable-HTTP transport. Stateless mode is required for
      # production: clients don't lose sessions when Watchtower
      # recreates the container.
      MCP_TRANSPORT: streamable-http
      MCP_HOST: 0.0.0.0
      MCP_PORT: "8000"

      # If you run MetaMCP or another gateway in front and reach
      # this container via its compose DNS name (e.g. morpheus-docs-mcp:8000),
      # add that hostname here. "*" disables the rebind check entirely.
      MCP_ALLOWED_HOSTS: "morpheus-docs-mcp,localhost,127.0.0.1"

      # Phase 6 — reranker sidecar (jina-reranker-v2-base via llama.cpp).
      RERANK_URL: http://hvm-rerank:8080
      RERANK_POOL: "200"
      RERANK_TIMEOUT: "30"

      # Phase 8 — hybrid retrieval (BM25 + dense + RRF).
      # Eval on the HVM corpus (eval/results/baseline.md, 2026-05-22) shows
      # BM25-default + reranker beats hybrid on every metric (MRR 0.920 vs
      # 0.875). Leaving HYBRID_SEARCH off so search_docs runs BM25-first +
      # reranker; dense is the fallback when BM25 finds nothing.
      HYBRID_SEARCH: "false"

      # Phase 10 — usage telemetry.
      USAGE_LOG_DIR: /app/var/logs
      USAGE_LOG_KEEP_DAYS: "90"

    volumes:
      # Usage logs persist across container recreates.
      - ./morpheus-docs-mcp-logs:/app/var/logs
    depends_on:
      - hvm-rerank
    labels:
      # Watchtower polls *only* containers with this label set true.
      com.centurylinklabs.watchtower.enable: "true"
    networks:
      - mcp

  # Reranker sidecar — llama.cpp serving jina-reranker-v2-base.
  # Requires GPU access; adjust runtime/devices for your hardware.
  #
  # For dev / CPU-only hosts, swap this service for scripts/rerank_server.py
  # (sentence-transformers ms-marco-MiniLM-L-6-v2). Same /v1/rerank shape,
  # ~500ms/batch on CPU vs ~50ms on GPU with the jina GGUF.
  hvm-rerank:
    image: ghcr.io/ggml-org/llama.cpp:server-cuda
    container_name: hvm-rerank
    restart: unless-stopped
    # Mount the GGUF model from the host. Download from huggingface
    # (gguf-org/jina-reranker-v2-base-multilingual-GGUF) first.
    volumes:
      - /path/to/models:/models:ro
    command: >
      --model /models/jina-reranker-v2-base.Q8_0.gguf
      --reranking
      --host 0.0.0.0
      --port 8080
      --n-gpu-layers 99
      --ctx-size 4096
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    networks:
      - mcp

  # Watchtower — auto-pulls :latest on push.
  # Only watches containers labeled `com.centurylinklabs.watchtower.enable=true`.
  watchtower:
    image: containrrr/watchtower:latest
    container_name: watchtower
    restart: unless-stopped
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    environment:
      WATCHTOWER_POLL_INTERVAL: "300"   # 5 min
      WATCHTOWER_LABEL_ENABLE: "true"
      WATCHTOWER_CLEANUP: "true"        # remove old images after pull
    # If your registry requires auth, mount a docker config:
    #  volumes:
    #    - ./registry-auth.json:/config.json:ro
    networks:
      - mcp

networks:
  mcp:
    driver: bridge