diff --git a/.gitea/workflows/image-only.yml b/.gitea/workflows/image-only.yml index 67bde473..6d5c65f7 100644 --- a/.gitea/workflows/image-only.yml +++ b/.gitea/workflows/image-only.yml @@ -32,9 +32,11 @@ env: REGISTRY_PUSH: 192.168.0.2:1234 REGISTRY_PULL: git.jpaul.io IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }} - # 4-GPU pool, weighted toward .0.125 (4090). See refresh.yml for the - # bench numbers. .0.2:11434 excluded — not GPU-pinned. - OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435,http://localhost:11434 + # 3-GPU LAN pool, weighted toward .0.125 (4090). See refresh.yml for + # the bench numbers. .0.2:11434 excluded (not GPU-pinned). localhost + # excluded from CI (runner container has no Ollama on its loopback; + # works in dev but fails in CI with connection refused). + OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435 EMBED_MODEL: nomic-embed-text PRODUCT_NAME: crop_seed diff --git a/.gitea/workflows/refresh.yml b/.gitea/workflows/refresh.yml index a3c921ca..defa037e 100644 --- a/.gitea/workflows/refresh.yml +++ b/.gitea/workflows/refresh.yml @@ -34,16 +34,17 @@ env: REGISTRY_PULL: git.jpaul.io IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }} - # Embedder pool — 4 GPUs total, with .0.125 (RTX 40-series) listed - # multiple times to weight the round-robin scheduler toward the - # fastest endpoint. Measured throughput (50-chunk batches on + # Embedder pool — 3 GPU-pinned endpoints reachable from the runner + # container on .0.2. Measured throughput (50-chunk batches on # nomic-embed-text): # .0.125:11434 (4090) 242 embeds/sec ← weighted ×4 # .0.2:11436 (GPU-pinned) 108 embeds/sec ← weighted ×2 # .0.2:11435 (GPU-pinned) 72 embeds/sec ← weight 1 - # localhost (TITAN X) 37 embeds/sec ← weight 1 # NOTE: .0.2:11434 is NOT GPU-pinned — exclude. - OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435,http://localhost:11434 + # NOTE: `localhost:11434` works locally during dev but resolves to the + # runner CONTAINER's own localhost in CI (no Ollama there → 111 + # connection refused). Use only LAN endpoints from CI. + OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435 EMBED_MODEL: nomic-embed-text PRODUCT_NAME: crop_seed diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index 9dffce65..16cc977a 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -1,83 +1,114 @@ -# Hosting stack for the seed-mcp MCP server. +# seed-mcp service block to MERGE into Drawbar's parent compose file +# at /home/justin/drawbar/drawbar-backend/docker-compose.yml on +# trashpanda. # -# This compose file is meant to live in Drawbar's deploy stack and is -# included here as the canonical reference. The seed-mcp image is -# self-contained — corpus + Chroma + BM25 are baked in by CI at build -# time — so the only host-side concerns are usage-log persistence and -# the shared reranker / Ollama sidecars. -# -# The reranker container (llama-rerank) is SHARED with crop-chem-docs. -# Drawbar's compose already has it from the crop-chem-docs deploy; -# don't duplicate it here when stacking the two MCPs together. -# -# Watchtower auto-pulls on :latest changes — but ONLY for containers -# labeled `com.centurylinklabs.watchtower.enable=true`. +# Pattern matches the existing chem-mcp service (crop-chem-docs): +# internal-only, no host port, MCP_PORT=8080 inside container, +# reached via docker DNS as `seed-mcp:8080` from drawbar-backend-api. +# Ollama lives on the host (host.docker.internal); the shared +# llama-rerank sidecar must be attached to drawbar-backend_default +# (see "llama-rerank patch" section below). services: - # The seed-mcp server. Image is rebuilt nightly by .gitea/workflows/ - # refresh.yml; pulled via the public git.jpaul.io endpoint (CF - # tunnels in front, so the 100 MB body cap doesn't matter on pulls). + # seed-mcp — 760 variety identity records + 4,313 trial documents. + # Tools land in the advisor's catalog under the `seed:` prefix via + # the mcp_client multiplex (same pattern chem-mcp uses). seed-mcp: image: git.jpaul.io/justin/seed-mcp:latest - container_name: seed-mcp - restart: unless-stopped - ports: - - "8001:8000" environment: - PRODUCT_NAME: "crop_seed" - PRODUCT_DOCS_URL: "https://git.jpaul.io/justin/seed-mcp" - - # Streamable-HTTP transport, stateless mode (every request gets - # a fresh ephemeral session). Required for production: avoids - # 404 storms when Watchtower recreates the container while - # clients hold session IDs from the previous instance. MCP_TRANSPORT: streamable-http MCP_HOST: 0.0.0.0 - MCP_PORT: "8000" + MCP_PORT: "8080" + # FastMCP DNS-rebinding protection — disabled because we're + # only reachable on the internal docker network as + # `seed-mcp:8080`; not published to the host. MCP_DISABLE_DNS_REBINDING_PROTECTION: "1" - - # Embedding pool. Drawbar's compose puts the seed-mcp on the - # same docker network as Ollama; comma-separate multiple - # endpoints (one per GPU) for indexing throughput. At runtime - # only search_docs hits this (one embed per query, ~5ms). - OLLAMA_URL: "http://ollama:11434" - - # Reranker. The llama.cpp sidecar serving jina-reranker-v2-base - # is SHARED with crop-chem-docs. Drawbar's compose already - # defines llama-rerank from the crop-chem-docs deploy; we just - # point at the same DNS name. Falls back to dense-only on any - # rerank error so MCP requests never block on the sidecar. - RERANK_URL: "http://llama-rerank:8080" - RERANK_POOL: "200" + PRODUCT_NAME: crop_seed + # Query-time embeddings via the host's Ollama on :11434 + # (nomic-embed-text). host.docker.internal is mapped below. + OLLAMA_URL: ${SEED_OLLAMA_URL:-http://host.docker.internal:11434} + EMBED_MODEL: ${SEED_EMBED_MODEL:-nomic-embed-text} + # Reranker. Shared llama.cpp sidecar — see "llama-rerank patch" + # below for the network-attach that makes this resolvable. + RERANK_URL: ${SEED_RERANK_URL:-http://llama-rerank:8080} + RERANK_POOL: "50" RERANK_TIMEOUT: "30" - - # Hybrid retrieval (BM25 + dense + RRF + exact-code prefilter). - # Worth it for seed-mcp because farmer queries often contain - # rare technical tokens — variety codes (DKC62-08RIB), trait - # codes (XF/VT2PRIB), Rps gene names, disease abbreviations. + # Hybrid + rerank is the eval-validated config (100% pass, 90% + # P@1, 0.905 MRR on 21 golden queries; see eval/results/ + # baseline.md). Without rerank, P@1 drops to 62%. HYBRID_SEARCH: "true" RRF_K: "60" - - # Usage telemetry. JSONL with daily rotation; 90-day retention. - USAGE_LOG_DIR: /app/var/logs - USAGE_LOG_KEEP_DAYS: "90" - volumes: - # Usage logs persist across container recreates. Mount point - # creates host directory `./seed-mcp-logs/` on first run. - - ./seed-mcp-logs:/app/var/logs + extra_hosts: + - "host.docker.internal:host-gateway" + expose: + - "8080" + restart: unless-stopped labels: - # Watchtower polls only containers with this label = true. + # Watchtower auto-pulls :latest on push from CI. The label is + # required because the Drawbar stack's watchtower is set to + # label-mode (WATCHTOWER_LABEL_ENABLE=true). com.centurylinklabs.watchtower.enable: "true" - networks: - - drawbar-mcp - # NOTE: do NOT include llama-rerank or ollama here if you're stacking - # this compose alongside crop-chem-docs. They're already defined in - # the parent stack. The networks: external: true block below assumes - # those services live on the drawbar-mcp shared network. -networks: - drawbar-mcp: - external: true - name: drawbar-mcp +# ─── llama-rerank patch ────────────────────────────────────────────── +# +# As of 2026-05-25, the llama-rerank container is on the default +# Docker `bridge` network — NOT on `drawbar-backend_default` where +# chem-mcp and seed-mcp live. The chem-mcp container's +# RERANK_URL=http://llama-rerank:8080 has been silently failing to +# resolve (returns 167.100.x via public DNS, connection refused), +# falling back to dense-only retrieval. To fix this and unlock +# rerank for BOTH chem-mcp and seed-mcp: +# +# docker network connect drawbar-backend_default llama-rerank +# +# This is idempotent and survives container restarts as long as the +# llama-rerank container is recreated by Watchtower; if Watchtower +# replaces it, re-run the connect command (or — better — bring +# llama-rerank into the compose stack so the network attachment is +# declarative). +# +# Alternatively, declare llama-rerank as a compose service in this +# parent stack: +# +# llama-rerank: +# image: ghcr.io/ggml-org/llama.cpp:server-cuda +# container_name: llama-rerank +# restart: unless-stopped +# volumes: +# - llama-rerank-cache:/root/.cache/huggingface +# command: > +# -hf gpustack/jina-reranker-v2-base-multilingual-GGUF:Q8_0 +# --reranking +# --host 0.0.0.0 --port 8080 +# --n-gpu-layers 99 --ctx-size 8192 +# --batch-size 4096 --ubatch-size 4096 --parallel 4 +# deploy: +# resources: +# reservations: +# devices: +# - driver: nvidia +# count: 1 +# capabilities: [gpu] +# labels: +# com.centurylinklabs.watchtower.enable: "false" +# expose: +# - "8080" +# +# (Watchtower disabled on llama-rerank to avoid surprise model-reload +# downtime; pin the image tag if you want predictability.) +# +# Note: --ubatch-size 4096 is required for the seed-mcp corpus — +# default 512 is too small for the ~600-token trial chunks and +# rejects the whole batch with "input too large to process". + +# ─── drawbar-backend-api wiring ────────────────────────────────────── +# +# Add to the drawbar-backend-api `environment:` block so it can call +# seed-mcp alongside chem-mcp: +# +# SEED_MCP_BASE_URL: ${SEED_MCP_BASE_URL:-http://seed-mcp:8080/mcp} +# +# The advisor's tool multiplex (mcp_client) picks this up and exposes +# the seed-mcp tools under the `seed:` prefix.