Merge pull request 'CI fix (localhost OLLAMA endpoint) + Drawbar deploy pattern' (#11) from ci-fix-and-deploy-pattern into main
This commit was merged in pull request #11.
This commit is contained in:
@@ -32,9 +32,11 @@ env:
|
||||
REGISTRY_PUSH: 192.168.0.2:1234
|
||||
REGISTRY_PULL: git.jpaul.io
|
||||
IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }}
|
||||
# 4-GPU pool, weighted toward .0.125 (4090). See refresh.yml for the
|
||||
# bench numbers. .0.2:11434 excluded — not GPU-pinned.
|
||||
OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435,http://localhost:11434
|
||||
# 3-GPU LAN pool, weighted toward .0.125 (4090). See refresh.yml for
|
||||
# the bench numbers. .0.2:11434 excluded (not GPU-pinned). localhost
|
||||
# excluded from CI (runner container has no Ollama on its loopback;
|
||||
# works in dev but fails in CI with connection refused).
|
||||
OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435
|
||||
EMBED_MODEL: nomic-embed-text
|
||||
PRODUCT_NAME: crop_seed
|
||||
|
||||
|
||||
@@ -34,16 +34,17 @@ env:
|
||||
REGISTRY_PULL: git.jpaul.io
|
||||
IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }}
|
||||
|
||||
# Embedder pool — 4 GPUs total, with .0.125 (RTX 40-series) listed
|
||||
# multiple times to weight the round-robin scheduler toward the
|
||||
# fastest endpoint. Measured throughput (50-chunk batches on
|
||||
# Embedder pool — 3 GPU-pinned endpoints reachable from the runner
|
||||
# container on .0.2. Measured throughput (50-chunk batches on
|
||||
# nomic-embed-text):
|
||||
# .0.125:11434 (4090) 242 embeds/sec ← weighted ×4
|
||||
# .0.2:11436 (GPU-pinned) 108 embeds/sec ← weighted ×2
|
||||
# .0.2:11435 (GPU-pinned) 72 embeds/sec ← weight 1
|
||||
# localhost (TITAN X) 37 embeds/sec ← weight 1
|
||||
# NOTE: .0.2:11434 is NOT GPU-pinned — exclude.
|
||||
OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435,http://localhost:11434
|
||||
# NOTE: `localhost:11434` works locally during dev but resolves to the
|
||||
# runner CONTAINER's own localhost in CI (no Ollama there → 111
|
||||
# connection refused). Use only LAN endpoints from CI.
|
||||
OLLAMA_URL: http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.125:11434,http://192.168.0.2:11436,http://192.168.0.2:11436,http://192.168.0.2:11435
|
||||
EMBED_MODEL: nomic-embed-text
|
||||
|
||||
PRODUCT_NAME: crop_seed
|
||||
|
||||
+97
-66
@@ -1,83 +1,114 @@
|
||||
# Hosting stack for the seed-mcp MCP server.
|
||||
# seed-mcp service block to MERGE into Drawbar's parent compose file
|
||||
# at /home/justin/drawbar/drawbar-backend/docker-compose.yml on
|
||||
# trashpanda.
|
||||
#
|
||||
# This compose file is meant to live in Drawbar's deploy stack and is
|
||||
# included here as the canonical reference. The seed-mcp image is
|
||||
# self-contained — corpus + Chroma + BM25 are baked in by CI at build
|
||||
# time — so the only host-side concerns are usage-log persistence and
|
||||
# the shared reranker / Ollama sidecars.
|
||||
#
|
||||
# The reranker container (llama-rerank) is SHARED with crop-chem-docs.
|
||||
# Drawbar's compose already has it from the crop-chem-docs deploy;
|
||||
# don't duplicate it here when stacking the two MCPs together.
|
||||
#
|
||||
# Watchtower auto-pulls on :latest changes — but ONLY for containers
|
||||
# labeled `com.centurylinklabs.watchtower.enable=true`.
|
||||
# Pattern matches the existing chem-mcp service (crop-chem-docs):
|
||||
# internal-only, no host port, MCP_PORT=8080 inside container,
|
||||
# reached via docker DNS as `seed-mcp:8080` from drawbar-backend-api.
|
||||
# Ollama lives on the host (host.docker.internal); the shared
|
||||
# llama-rerank sidecar must be attached to drawbar-backend_default
|
||||
# (see "llama-rerank patch" section below).
|
||||
|
||||
services:
|
||||
|
||||
# The seed-mcp server. Image is rebuilt nightly by .gitea/workflows/
|
||||
# refresh.yml; pulled via the public git.jpaul.io endpoint (CF
|
||||
# tunnels in front, so the 100 MB body cap doesn't matter on pulls).
|
||||
# seed-mcp — 760 variety identity records + 4,313 trial documents.
|
||||
# Tools land in the advisor's catalog under the `seed:` prefix via
|
||||
# the mcp_client multiplex (same pattern chem-mcp uses).
|
||||
seed-mcp:
|
||||
image: git.jpaul.io/justin/seed-mcp:latest
|
||||
container_name: seed-mcp
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8001:8000"
|
||||
environment:
|
||||
PRODUCT_NAME: "crop_seed"
|
||||
PRODUCT_DOCS_URL: "https://git.jpaul.io/justin/seed-mcp"
|
||||
|
||||
# Streamable-HTTP transport, stateless mode (every request gets
|
||||
# a fresh ephemeral session). Required for production: avoids
|
||||
# 404 storms when Watchtower recreates the container while
|
||||
# clients hold session IDs from the previous instance.
|
||||
MCP_TRANSPORT: streamable-http
|
||||
MCP_HOST: 0.0.0.0
|
||||
MCP_PORT: "8000"
|
||||
MCP_PORT: "8080"
|
||||
# FastMCP DNS-rebinding protection — disabled because we're
|
||||
# only reachable on the internal docker network as
|
||||
# `seed-mcp:8080`; not published to the host.
|
||||
MCP_DISABLE_DNS_REBINDING_PROTECTION: "1"
|
||||
|
||||
# Embedding pool. Drawbar's compose puts the seed-mcp on the
|
||||
# same docker network as Ollama; comma-separate multiple
|
||||
# endpoints (one per GPU) for indexing throughput. At runtime
|
||||
# only search_docs hits this (one embed per query, ~5ms).
|
||||
OLLAMA_URL: "http://ollama:11434"
|
||||
|
||||
# Reranker. The llama.cpp sidecar serving jina-reranker-v2-base
|
||||
# is SHARED with crop-chem-docs. Drawbar's compose already
|
||||
# defines llama-rerank from the crop-chem-docs deploy; we just
|
||||
# point at the same DNS name. Falls back to dense-only on any
|
||||
# rerank error so MCP requests never block on the sidecar.
|
||||
RERANK_URL: "http://llama-rerank:8080"
|
||||
RERANK_POOL: "200"
|
||||
PRODUCT_NAME: crop_seed
|
||||
# Query-time embeddings via the host's Ollama on :11434
|
||||
# (nomic-embed-text). host.docker.internal is mapped below.
|
||||
OLLAMA_URL: ${SEED_OLLAMA_URL:-http://host.docker.internal:11434}
|
||||
EMBED_MODEL: ${SEED_EMBED_MODEL:-nomic-embed-text}
|
||||
# Reranker. Shared llama.cpp sidecar — see "llama-rerank patch"
|
||||
# below for the network-attach that makes this resolvable.
|
||||
RERANK_URL: ${SEED_RERANK_URL:-http://llama-rerank:8080}
|
||||
RERANK_POOL: "50"
|
||||
RERANK_TIMEOUT: "30"
|
||||
|
||||
# Hybrid retrieval (BM25 + dense + RRF + exact-code prefilter).
|
||||
# Worth it for seed-mcp because farmer queries often contain
|
||||
# rare technical tokens — variety codes (DKC62-08RIB), trait
|
||||
# codes (XF/VT2PRIB), Rps gene names, disease abbreviations.
|
||||
# Hybrid + rerank is the eval-validated config (100% pass, 90%
|
||||
# P@1, 0.905 MRR on 21 golden queries; see eval/results/
|
||||
# baseline.md). Without rerank, P@1 drops to 62%.
|
||||
HYBRID_SEARCH: "true"
|
||||
RRF_K: "60"
|
||||
|
||||
# Usage telemetry. JSONL with daily rotation; 90-day retention.
|
||||
USAGE_LOG_DIR: /app/var/logs
|
||||
USAGE_LOG_KEEP_DAYS: "90"
|
||||
volumes:
|
||||
# Usage logs persist across container recreates. Mount point
|
||||
# creates host directory `./seed-mcp-logs/` on first run.
|
||||
- ./seed-mcp-logs:/app/var/logs
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
expose:
|
||||
- "8080"
|
||||
restart: unless-stopped
|
||||
labels:
|
||||
# Watchtower polls only containers with this label = true.
|
||||
# Watchtower auto-pulls :latest on push from CI. The label is
|
||||
# required because the Drawbar stack's watchtower is set to
|
||||
# label-mode (WATCHTOWER_LABEL_ENABLE=true).
|
||||
com.centurylinklabs.watchtower.enable: "true"
|
||||
networks:
|
||||
- drawbar-mcp
|
||||
|
||||
# NOTE: do NOT include llama-rerank or ollama here if you're stacking
|
||||
# this compose alongside crop-chem-docs. They're already defined in
|
||||
# the parent stack. The networks: external: true block below assumes
|
||||
# those services live on the drawbar-mcp shared network.
|
||||
|
||||
networks:
|
||||
drawbar-mcp:
|
||||
external: true
|
||||
name: drawbar-mcp
|
||||
# ─── llama-rerank patch ──────────────────────────────────────────────
|
||||
#
|
||||
# As of 2026-05-25, the llama-rerank container is on the default
|
||||
# Docker `bridge` network — NOT on `drawbar-backend_default` where
|
||||
# chem-mcp and seed-mcp live. The chem-mcp container's
|
||||
# RERANK_URL=http://llama-rerank:8080 has been silently failing to
|
||||
# resolve (returns 167.100.x via public DNS, connection refused),
|
||||
# falling back to dense-only retrieval. To fix this and unlock
|
||||
# rerank for BOTH chem-mcp and seed-mcp:
|
||||
#
|
||||
# docker network connect drawbar-backend_default llama-rerank
|
||||
#
|
||||
# This is idempotent and survives container restarts as long as the
|
||||
# llama-rerank container is recreated by Watchtower; if Watchtower
|
||||
# replaces it, re-run the connect command (or — better — bring
|
||||
# llama-rerank into the compose stack so the network attachment is
|
||||
# declarative).
|
||||
#
|
||||
# Alternatively, declare llama-rerank as a compose service in this
|
||||
# parent stack:
|
||||
#
|
||||
# llama-rerank:
|
||||
# image: ghcr.io/ggml-org/llama.cpp:server-cuda
|
||||
# container_name: llama-rerank
|
||||
# restart: unless-stopped
|
||||
# volumes:
|
||||
# - llama-rerank-cache:/root/.cache/huggingface
|
||||
# command: >
|
||||
# -hf gpustack/jina-reranker-v2-base-multilingual-GGUF:Q8_0
|
||||
# --reranking
|
||||
# --host 0.0.0.0 --port 8080
|
||||
# --n-gpu-layers 99 --ctx-size 8192
|
||||
# --batch-size 4096 --ubatch-size 4096 --parallel 4
|
||||
# deploy:
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: 1
|
||||
# capabilities: [gpu]
|
||||
# labels:
|
||||
# com.centurylinklabs.watchtower.enable: "false"
|
||||
# expose:
|
||||
# - "8080"
|
||||
#
|
||||
# (Watchtower disabled on llama-rerank to avoid surprise model-reload
|
||||
# downtime; pin the image tag if you want predictability.)
|
||||
#
|
||||
# Note: --ubatch-size 4096 is required for the seed-mcp corpus —
|
||||
# default 512 is too small for the ~600-token trial chunks and
|
||||
# rejects the whole batch with "input too large to process".
|
||||
|
||||
# ─── drawbar-backend-api wiring ──────────────────────────────────────
|
||||
#
|
||||
# Add to the drawbar-backend-api `environment:` block so it can call
|
||||
# seed-mcp alongside chem-mcp:
|
||||
#
|
||||
# SEED_MCP_BASE_URL: ${SEED_MCP_BASE_URL:-http://seed-mcp:8080/mcp}
|
||||
#
|
||||
# The advisor's tool multiplex (mcp_client) picks this up and exposes
|
||||
# the seed-mcp tools under the `seed:` prefix.
|
||||
|
||||
Reference in New Issue
Block a user