Merge pull request 'CI fix (localhost OLLAMA endpoint) + Drawbar deploy pattern' (#11) from ci-fix-and-deploy-pattern into main

2026-05-25 17:23:20 -04:00
parent 038475e7fd cd4a0f3148
commit c5a00afca2
3 changed files with 108 additions and 74 deletions
@@ -32,9 +32,11 @@ env:
  REGISTRY_PUSH: 192.168.0.2:1234
  REGISTRY_PULL: git.jpaul.io
  IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }}
-  # 4-GPU pool, weighted toward .0.125 (4090). See refresh.yml for the
-  # bench numbers. .0.2:11434 excluded — not GPU-pinned.
-  OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435,http://localhost:11434
+  # 3-GPU LAN pool, weighted toward .0.125 (4090). See refresh.yml for
+  # the bench numbers. .0.2:11434 excluded (not GPU-pinned). localhost
+  # excluded from CI (runner container has no Ollama on its loopback;
+  # works in dev but fails in CI with connection refused).
+  OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435
  EMBED_MODEL: nomic-embed-text
  PRODUCT_NAME: crop_seed

@@ -34,16 +34,17 @@ env:
  REGISTRY_PULL: git.jpaul.io
  IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }}

-  # Embedder pool — 4 GPUs total, with .0.125 (RTX 40-series) listed
-  # multiple times to weight the round-robin scheduler toward the
-  # fastest endpoint. Measured throughput (50-chunk batches on
+  # Embedder pool — 3 GPU-pinned endpoints reachable from the runner
+  # container on .0.2. Measured throughput (50-chunk batches on
  # nomic-embed-text):
  #   .0.125:11434  (4090)       242 embeds/sec  ← weighted ×4
  #   .0.2:11436    (GPU-pinned) 108 embeds/sec  ← weighted ×2
  #   .0.2:11435    (GPU-pinned)  72 embeds/sec  ← weight 1
-  #   localhost (TITAN X)         37 embeds/sec  ← weight 1
  # NOTE: .0.2:11434 is NOT GPU-pinned — exclude.
-  OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435,http://localhost:11434
+  # NOTE: `localhost:11434` works locally during dev but resolves to the
+  # runner CONTAINER's own localhost in CI (no Ollama there → 111
+  # connection refused). Use only LAN endpoints from CI.
+  OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435
  EMBED_MODEL: nomic-embed-text

  PRODUCT_NAME: crop_seed
@@ -1,83 +1,114 @@
-# Hosting stack for the seed-mcp MCP server.
+# seed-mcp service block to MERGE into Drawbar's parent compose file
+# at /home/justin/drawbar/drawbar-backend/docker-compose.yml on
+# trashpanda.
 #
-# This compose file is meant to live in Drawbar's deploy stack and is
-# included here as the canonical reference. The seed-mcp image is
-# self-contained — corpus + Chroma + BM25 are baked in by CI at build
-# time — so the only host-side concerns are usage-log persistence and
-# the shared reranker / Ollama sidecars.
-#
-# The reranker container (llama-rerank) is SHARED with crop-chem-docs.
-# Drawbar's compose already has it from the crop-chem-docs deploy;
-# don't duplicate it here when stacking the two MCPs together.
-#
-# Watchtower auto-pulls on :latest changes — but ONLY for containers
-# labeled `com.centurylinklabs.watchtower.enable=true`.
+# Pattern matches the existing chem-mcp service (crop-chem-docs):
+# internal-only, no host port, MCP_PORT=8080 inside container,
+# reached via docker DNS as `seed-mcp:8080` from drawbar-backend-api.
+# Ollama lives on the host (host.docker.internal); the shared
+# llama-rerank sidecar must be attached to drawbar-backend_default
+# (see "llama-rerank patch" section below).

 services:

-  # The seed-mcp server. Image is rebuilt nightly by .gitea/workflows/
-  # refresh.yml; pulled via the public git.jpaul.io endpoint (CF
-  # tunnels in front, so the 100 MB body cap doesn't matter on pulls).
+  # seed-mcp — 760 variety identity records + 4,313 trial documents.
+  # Tools land in the advisor's catalog under the `seed:` prefix via
+  # the mcp_client multiplex (same pattern chem-mcp uses).
  seed-mcp:
    image: git.jpaul.io/justin/seed-mcp:latest
-    container_name: seed-mcp
-    restart: unless-stopped
-    ports:
-      - "8001:8000"
    environment:
-      PRODUCT_NAME: "crop_seed"
-      PRODUCT_DOCS_URL: "https://git.jpaul.io/justin/seed-mcp"
-
-      # Streamable-HTTP transport, stateless mode (every request gets
-      # a fresh ephemeral session). Required for production: avoids
-      # 404 storms when Watchtower recreates the container while
-      # clients hold session IDs from the previous instance.
      MCP_TRANSPORT: streamable-http
      MCP_HOST: 0.0.0.0
-      MCP_PORT: "8000"
+      MCP_PORT: "8080"
+      # FastMCP DNS-rebinding protection — disabled because we're
+      # only reachable on the internal docker network as
+      # `seed-mcp:8080`; not published to the host.
      MCP_DISABLE_DNS_REBINDING_PROTECTION: "1"
-
-      # Embedding pool. Drawbar's compose puts the seed-mcp on the
-      # same docker network as Ollama; comma-separate multiple
-      # endpoints (one per GPU) for indexing throughput. At runtime
-      # only search_docs hits this (one embed per query, ~5ms).
-      OLLAMA_URL: "http://ollama:11434"
-
-      # Reranker. The llama.cpp sidecar serving jina-reranker-v2-base
-      # is SHARED with crop-chem-docs. Drawbar's compose already
-      # defines llama-rerank from the crop-chem-docs deploy; we just
-      # point at the same DNS name. Falls back to dense-only on any
-      # rerank error so MCP requests never block on the sidecar.
-      RERANK_URL: "http://llama-rerank:8080"
-      RERANK_POOL: "200"
+      PRODUCT_NAME: crop_seed
+      # Query-time embeddings via the host's Ollama on :11434
+      # (nomic-embed-text). host.docker.internal is mapped below.
+      OLLAMA_URL: ${SEED_OLLAMA_URL:-http://host.docker.internal:11434}
+      EMBED_MODEL: ${SEED_EMBED_MODEL:-nomic-embed-text}
+      # Reranker. Shared llama.cpp sidecar — see "llama-rerank patch"
+      # below for the network-attach that makes this resolvable.
+      RERANK_URL: ${SEED_RERANK_URL:-http://llama-rerank:8080}
+      RERANK_POOL: "50"
      RERANK_TIMEOUT: "30"
-
-      # Hybrid retrieval (BM25 + dense + RRF + exact-code prefilter).
-      # Worth it for seed-mcp because farmer queries often contain
-      # rare technical tokens — variety codes (DKC62-08RIB), trait
-      # codes (XF/VT2PRIB), Rps gene names, disease abbreviations.
+      # Hybrid + rerank is the eval-validated config (100% pass, 90%
+      # P@1, 0.905 MRR on 21 golden queries; see eval/results/
+      # baseline.md). Without rerank, P@1 drops to 62%.
      HYBRID_SEARCH: "true"
      RRF_K: "60"
-
-      # Usage telemetry. JSONL with daily rotation; 90-day retention.
-      USAGE_LOG_DIR: /app/var/logs
-      USAGE_LOG_KEEP_DAYS: "90"
-    volumes:
-      # Usage logs persist across container recreates. Mount point
-      # creates host directory `./seed-mcp-logs/` on first run.
-      - ./seed-mcp-logs:/app/var/logs
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    expose:
+      - "8080"
+    restart: unless-stopped
    labels:
-      # Watchtower polls only containers with this label = true.
+      # Watchtower auto-pulls :latest on push from CI. The label is
+      # required because the Drawbar stack's watchtower is set to
+      # label-mode (WATCHTOWER_LABEL_ENABLE=true).
      com.centurylinklabs.watchtower.enable: "true"
-    networks:
-      - drawbar-mcp

-  # NOTE: do NOT include llama-rerank or ollama here if you're stacking
-  # this compose alongside crop-chem-docs. They're already defined in
-  # the parent stack. The networks: external: true block below assumes
-  # those services live on the drawbar-mcp shared network.

-networks:
-  drawbar-mcp:
-    external: true
-    name: drawbar-mcp
+# ─── llama-rerank patch ──────────────────────────────────────────────
+#
+# As of 2026-05-25, the llama-rerank container is on the default
+# Docker `bridge` network — NOT on `drawbar-backend_default` where
+# chem-mcp and seed-mcp live. The chem-mcp container's
+# RERANK_URL=http://llama-rerank:8080 has been silently failing to
+# resolve (returns 167.100.x via public DNS, connection refused),
+# falling back to dense-only retrieval. To fix this and unlock
+# rerank for BOTH chem-mcp and seed-mcp:
+#
+#     docker network connect drawbar-backend_default llama-rerank
+#
+# This is idempotent and survives container restarts as long as the
+# llama-rerank container is recreated by Watchtower; if Watchtower
+# replaces it, re-run the connect command (or — better — bring
+# llama-rerank into the compose stack so the network attachment is
+# declarative).
+#
+# Alternatively, declare llama-rerank as a compose service in this
+# parent stack:
+#
+#     llama-rerank:
+#       image: ghcr.io/ggml-org/llama.cpp:server-cuda
+#       container_name: llama-rerank
+#       restart: unless-stopped
+#       volumes:
+#         - llama-rerank-cache:/root/.cache/huggingface
+#       command: >
+#         -hf gpustack/jina-reranker-v2-base-multilingual-GGUF:Q8_0
+#         --reranking
+#         --host 0.0.0.0 --port 8080
+#         --n-gpu-layers 99 --ctx-size 8192
+#         --batch-size 4096 --ubatch-size 4096 --parallel 4
+#       deploy:
+#         resources:
+#           reservations:
+#             devices:
+#               - driver: nvidia
+#                 count: 1
+#                 capabilities: [gpu]
+#       labels:
+#         com.centurylinklabs.watchtower.enable: "false"
+#       expose:
+#         - "8080"
+#
+# (Watchtower disabled on llama-rerank to avoid surprise model-reload
+# downtime; pin the image tag if you want predictability.)
+#
+# Note: --ubatch-size 4096 is required for the seed-mcp corpus —
+# default 512 is too small for the ~600-token trial chunks and
+# rejects the whole batch with "input too large to process".
+
+# ─── drawbar-backend-api wiring ──────────────────────────────────────
+#
+# Add to the drawbar-backend-api `environment:` block so it can call
+# seed-mcp alongside chem-mcp:
+#
+#     SEED_MCP_BASE_URL: ${SEED_MCP_BASE_URL:-http://seed-mcp:8080/mcp}
+#
+# The advisor's tool multiplex (mcp_client) picks this up and exposes
+# the seed-mcp tools under the `seed:` prefix.