From af44d7a102a3fd03f75681ae710f68ff9342d10a Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Sun, 24 May 2026 12:10:09 -0400 Subject: [PATCH] Phase 11 + Phase 6 GPU move MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Phase 11 — Curated agronomy / label-handling knowledge layer docs_mcp/lessons.md: 13 topic-anchored markdown sections covering the LLM-side context a farmer-advisor needs alongside the raw label corpus — - how-to-use-this-corpus - epa-signal-words - rei-phi-fundamentals - rup-handling - supplemental-labels-24c-2ee - tank-mix-fundamentals - resistance-management-hrac-frac-irac - glufosinate-application-rules - dicamba-application-rules - lake-erie-watershed-ohio - scn-and-other-seed-treatment-context - drift-management-essentials - how-to-format-recommendations Each Topic block is independently retrievable via the new MCP tool: ppls_api_lessons(topic="rup-handling") Or with no topic to get the full TOC, or with a substring to match-and-return matching sections ("dicamba" → dicamba-application-rules). Tool docstring instructs the LLM to call this proactively before any pesticide recommendation so the recommendation lands with regulatory framing, resistance-group callouts, RUP applicator language, and the canonical recommendation format — not just a rate from a label. ## Phase 6 — Reranker moved to GPU on trashpanda Stopped the local CPU container and started on trashpanda's Tesla P4 (8 GB VRAM) via: docker run -d --name llama-rerank --restart unless-stopped --gpus all \ -p 8082:8080 \ ghcr.io/ggml-org/llama.cpp:server-cuda \ -hf gpustack/jina-reranker-v2-base-multilingual-GGUF:Q8_0 \ --reranking --host 0.0.0.0 --port 8080 -ngl 99 The :server-cuda image variant (not :server) is required for CUDA backend; -ngl 99 offloads all layers to GPU. Latency: 50-doc rerank dropped from ~23 s on CPU to ~0.7-1.5 s on the Tesla P4 — production-grade interactive speeds. deploy/rerank-docker.md updated with the trashpanda deploy recipe, troubleshooting (mostly "did you use server-cuda?"), and a perf reference table. The MCP server's RERANK_URL just points at http://10.10.1.65:8082 now. GPU eval still completing in background; results land in eval/results/with_rerank_gpu.md as a follow-up commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- deploy/rerank-docker.md | 74 ++++++++---- docs_mcp/lessons.md | 262 ++++++++++++++++++++++++++++++++++++++++ docs_mcp/server.py | 101 +++++++++++++++- 3 files changed, 410 insertions(+), 27 deletions(-) create mode 100644 docs_mcp/lessons.md diff --git a/deploy/rerank-docker.md b/deploy/rerank-docker.md index 78910c1..63632d0 100644 --- a/deploy/rerank-docker.md +++ b/deploy/rerank-docker.md @@ -4,29 +4,48 @@ Phase 6 setup. The MCP server reads `RERANK_URL` and, when set, pipes the top-50 dense (or hybrid) chunks through this sidecar before returning to the LLM. See `docs_mcp/server.py:_rerank_pool`. -## Run +## Production deploy — trashpanda (Tesla P4, 8 GB VRAM) + +This is where the reranker lives. Same box that runs the Drawbar +backend + Cloudflare Tunnel, so the MCP server can reach it on the +internal LAN. ```bash -docker run -d --name llama-rerank -p 8082:8080 \ - ghcr.io/ggml-org/llama.cpp:server \ - -hf gpustack/jina-reranker-v2-base-multilingual-GGUF:Q8_0 \ - --reranking --host 0.0.0.0 --port 8080 +ssh justin@10.10.1.65 \ + 'docker run -d --name llama-rerank --restart unless-stopped --gpus all \ + -p 8082:8080 \ + ghcr.io/ggml-org/llama.cpp:server-cuda \ + -hf gpustack/jina-reranker-v2-base-multilingual-GGUF:Q8_0 \ + --reranking --host 0.0.0.0 --port 8080 -ngl 99' ``` -The image auto-downloads the GGUF on first start (~280 MB, one-time). -First request loads the model into memory (~1s on CPU). +Key flags: +- `--gpus all` — pass through the Tesla P4 +- `server-cuda` image — CUDA-built llama.cpp (not the CPU-only `:server`) +- `-ngl 99` — offload all layers to GPU +- `-hf ` — auto-download from HuggingFace on first start (~280 MB, + cached in the container volume) +- `--reranking` — enables `/v1/rerank` endpoint +- `--restart unless-stopped` — survives reboot + +VRAM usage: ~280 MB model + CUDA context. Well under the 8 GB the +Tesla P4 has, leaves room for nomic-embed-text (~560 MB) if you +later co-host it. ## Configure the MCP server ```bash -export RERANK_URL=http://localhost:8082 -# search_docs will now rerank automatically +export RERANK_URL=http://10.10.1.65:8082 +# search_docs now reranks the hybrid pool through the GPU before returning ``` +In production (the MetaMCP-fronted Drawbar deploy), this is baked +into the MCP server's container env. + ## Verify ```bash -curl http://localhost:8082/v1/rerank -H 'Content-Type: application/json' -d '{ +curl http://10.10.1.65:8082/v1/rerank -H 'Content-Type: application/json' -d '{ "query": "soybean herbicide for waterhemp", "documents": [ "Roundup Custom for fallow burndown", @@ -36,17 +55,28 @@ curl http://localhost:8082/v1/rerank -H 'Content-Type: application/json' -d '{ ``` Expect index=1 (the Sencor doc) at score ~0.8, index=0 at a strongly -negative score. +negative score, in under 1 s. -## Performance notes +## Performance reference -- **CPU-only is slow.** ~0.5s per (query, doc) pair → ~23s for a - 50-doc pool. Fine for batch eval; painful for interactive queries. -- For production, run on GPU: add `--gpus all` to docker, llama.cpp - uses the CUDA backend automatically. Expect ~10-20× speedup. -- Alternative: drop `RERANK_POOL` from 50 to ~20 in the server env. - Cuts latency 2.5× at the cost of some quality (rerank gets fewer - candidates to choose from). -- For very small batches the reranker can also run alongside - Ollama on the same GPU box — `jina-reranker-v2-base` is ~280 MB - and won't conflict with `nomic-embed-text` (~560 MB VRAM each). +| Mode | Pool | Wall time | +|---|---|---| +| CPU (local 28-thread Xeon) | 50 docs | ~23 s | +| GPU (Tesla P4 on trashpanda) | 50 docs | ~0.7-1.5 s | +| GPU (Tesla P4) | 20 docs | ~0.4 s | + +The Tesla P4 is Pascal-era (8.1 TFLOPs FP32) so a modern Ampere or +Ada Lovelace GPU would be ~3-5× faster, but for the row-crop label +corpus query rate the P4 is plenty. + +## Troubleshooting + +- **Model not on GPU?** Check `docker logs llama-rerank | grep CUDA` — + you should see `CUDA0 : Tesla P4 (8109 MiB, ... free)` and tensor + load lines. If you see CPU-only init, you forgot `--gpus all` or + used `:server` instead of `:server-cuda`. +- **Conflict with Ollama on the same GPU?** No — both processes can + share the GPU, CUDA handles VRAM partitioning. nomic-embed-text + + jina-reranker-v2-base together use ~840 MB on the 8 GB card. +- **First rerank call is slow (~4 s)?** Warm-up. Subsequent calls are + ~0.7 s for 50 docs. diff --git a/docs_mcp/lessons.md b/docs_mcp/lessons.md new file mode 100644 index 0000000..aa0f27e --- /dev/null +++ b/docs_mcp/lessons.md @@ -0,0 +1,262 @@ +# PPLS API Lessons + +Curated agronomy + label-handling knowledge that an LLM should know +*before* giving recommendations from the labels corpus. Surfaced by +the `ppls_api_lessons` MCP tool. + +Each top-level `## Topic: ` block is independently retrievable. +The tool docstring tells the LLM to call this proactively before +answering any pesticide recommendation question. + +--- + +## Topic: how-to-use-this-corpus + +The PPLS docs corpus is the source of truth for *what's on the label*. +You should: + +1. **Run `search_docs` first** with the user's natural-language + question. Hybrid+rerank mode (default in production) returns the + most relevant label chunks across Bayer + every major US ag-chem + registrant via EPA PPLS. +2. **Cite the EPA Reg No** next to any product recommendation. Format: + `PRODUCT NAME (EPA Reg X-Y)`. Drop this and the recommendation is + ungrounded. +3. **Link the label PDF URL** so the user can verify and the spray + operator can have the actual label on hand. The sidecar's + `label.url` is in the search result metadata. +4. **Quote — don't paraphrase — rate ranges**. Labels say "16 to 32 + fl oz/A"; *do not* tighten that to "use 24 fl oz/A" unless the + label gives a specific use case at that rate. +5. **If you can't find a label-grounded answer**, say so. Better to + return "no label in corpus matches this; consult the manufacturer + or your CCA" than to hallucinate a rate. + +The corpus is **scoped to US row crops: corn, soybeans, wheat**. +Outside that scope, results are sparse or empty. + +## Topic: epa-signal-words + +Every EPA-registered pesticide label has a signal word in the upper +front panel. It maps to acute toxicity: + +| Signal word | Toxicity | Typical examples | +|---|---|---| +| **DANGER** + "POISON" + skull-and-crossbones | Cat I, highly toxic | Paraquat (Gramoxone), some methyl bromide | +| **DANGER** (no POISON) | Cat I (skin/eye irritation only) | Some restricted-use ester formulations | +| **WARNING** | Cat II | Many fomesafen formulations, some 2,4-D esters | +| **CAUTION** | Cat III/IV, least toxic | Most modern soybean/corn herbicides — glyphosate, mesotrione, fomesafen amine salts | +| **(none)** | Cat IV | A few biopesticides + some adjuvants | + +When recommending a DANGER-labeled product, *always* call out PPE +requirements (typically chemical-resistant gloves, footwear, eyewear, +respirator depending on activity). + +## Topic: rei-phi-fundamentals + +Two distinct intervals — don't confuse them: + +- **REI** (Restricted Entry Interval): minimum time AFTER application + before a worker may enter the treated area *without* the label's + full PPE. Typical values: 4, 12, 24, 48 hours. +- **PHI** (Pre-Harvest Interval): minimum time BETWEEN last application + and crop harvest. Typical values: 7, 14, 21, 30, 60, 90 days + depending on chemistry + crop. + +Always state both when relevant to the workflow. For burndown +applications, PHI rarely matters; for in-crop foliar, it's critical. + +## Topic: rup-handling + +Restricted Use Pesticide (RUP) is a *federal* designation that means: +**the product can only be purchased, possessed, and applied by (or +under direct supervision of) a certified pesticide applicator.** + +Row-crop products you'll commonly see in RUP class: +- **Paraquat-based** (Gramoxone Inteon, Helmquat, Firestorm) — RUP + special closed-system training required since 2019 +- **Dicamba formulations approved for in-crop soybean/cotton** (XtendiMax, Engenia, Tavium) — RUP + applicator training every year +- **Some pyrethroids** (Warrior II, Mustang Maxx) — RUP in some states + +When recommending an RUP, *always* say: +> "This is a Restricted Use Pesticide — application requires a +> certified applicator and proper recordkeeping per state regs." + +Never give a "casual" application recommendation for an RUP. The +recommendation must include the applicator-certification framing. + +## Topic: supplemental-labels-24c-2ee + +Beyond the main federal (§3) label, products often have: + +- **2(ee) recommendations**: manufacturer-issued, label-compliant + *additional uses* that don't require formal re-registration. + These add new tank-mixes, crops, or pests within the existing + label's authority. You can recommend a 2(ee) — but tell the user + the 2(ee) document itself must be in their possession at spray time. +- **24(c) Special Local Need (SLN)**: state-specific labels approved + by the state lead agency for a problem peculiar to that state. + Same possession-at-spray rule. SLNs are common for cotton in TX + and rice in southern states; less common in OH row crops. + +The Bayer scraper captures these as `supplemental_documents` in each +label's sidecar (`kind: "2EE"` or `"24C"`). For EPA PPLS labels, the +main label is what's in the corpus. + +## Topic: tank-mix-fundamentals + +When recommending tank mixes: + +1. **The more restrictive label wins.** If product A allows 2 qt/A + max in-crop and product B caps tank-mix partners at 1 qt/A for + that crop, the cap is 1 qt/A. +2. **Antagonism is real.** A few well-known antagonisms: + - Glufosinate + grass herbicides (clethodim, sethoxydim) → reduced + grass control. Apply grass herbicides separately, 7 days apart. + - Atrazine + dicamba + Group 15 (e.g., S-metolachlor) all-at-once + can hammer corn under cold/wet conditions. + - 2,4-D ester + glufosinate → can reduce glufosinate activity. +3. **Adjuvant compatibility:** + - Glufosinate (Liberty) REQUIRES AMS @ 1.5-3 lb/A. No exceptions. + - Glyphosate works best with NIS in soft water, or with AMS + conditioner in hard water (Mg/Ca > 200 ppm). + - PPO herbicides (lactofen, fomesafen) often want COC, not NIS. +4. **Always check both labels' "Tank-Mix Compatibility" or + "Restrictions" sections** before recommending — the corpus has + these sections; pull them with `search_docs`. + +## Topic: resistance-management-hrac-frac-irac + +Herbicide resistance is the single biggest threat to row-crop weed +control in the US Midwest. Always communicate resistance group when +recommending: + +- **HRAC** (Herbicide Resistance Action Committee) groups (formerly + WSSA numbers). Use the *number* not just the name — farmers + recognize "Group 14" faster than "PPO inhibitor". +- **FRAC** for fungicides. +- **IRAC** for insecticides. + +Key Midwest resistance hotspots: +- **Waterhemp + Palmer amaranth**: resistant to Groups 2, 5, 9, 14, + 15, 27 in places. Means glyphosate, ALS, atrazine, fomesafen, + metolachlor, and HPPDs (mesotrione) all have spotty efficacy. + → Always mix MOAs; never spray a single Group twice in a season. +- **Marestail/horseweed**: glyphosate-resistant nationwide; 2,4-D + remains the burndown anchor + Sharpen (saflufenacil, Group 14). +- **Giant ragweed**: glyphosate + ALS resistant in many areas. + +When the user asks for a recommendation, *say* the group number +(e.g., "Sencor (metribuzin, Group 5)") so they can rotate. + +## Topic: glufosinate-application-rules + +Glufosinate (Liberty 280 SL, Cheetah Max generic, etc.) is unique: + +- **Photosynthesis-dependent**: needs bright sun within ~4 hours of + application. Cloudy days = poor control. +- **Needs warmth**: ideally daytime temp > 60°F at application. +- **AMS is mandatory** at 1.5-3 lb/A. +- **Coverage trumps droplet size**: use flat-fan or AIXR nozzles, 15-20 + GPA carrier, medium droplets. Don't go ultra-coarse to reduce drift. +- **Two-pass strategy** for heavy weed pressure (V2 + V4-V5 in + soybean) outperforms a single higher-rate pass. +- **Weed-size critical**: best on weeds ≤ 4". After 6" efficacy drops. + +## Topic: dicamba-application-rules + +Dicamba in-crop in soybean/cotton (XtendiMax, Engenia, Tavium) is +under intense EPA scrutiny. Current label rules (verify against the +specific label in corpus before recommending): + +- **RUP + annual applicator training** required. +- **State and date cutoffs**: most states have application date + cutoffs (e.g., June 30 in OH for soybean; varies by state). Check + the state-specific 24(c) label. +- **Wind**: 3-10 mph at boom height. No spraying during temperature + inversions (typically pre-sunrise + late evening). +- **Buffers**: downwind buffer to sensitive areas (typically 110-220 + ft; depends on state + downwind sensitivity). +- **Approved nozzles only**: TTI or AIXR with very-coarse-to-ultra- + coarse droplets. Manufacturer publishes approved nozzle lists. +- **Tank cleanout**: triple-rinse with ammonia-based cleaner after + every load. Dicamba contamination of subsequent loads is the #1 + off-target damage cause. + +If the label in the corpus is older than the current EPA decision, +*say so* and direct the user to the latest manufacturer label — +EPA has revised dicamba registrations multiple times. + +## Topic: lake-erie-watershed-ohio + +Ohio's H2Ohio program + the Western Lake Erie Basin (WLEB) impose +additional considerations for nutrient/pesticide runoff: + +- **Atrazine**: WLEB subwatersheds have voluntary reduction targets; + formal label restrictions in some HUC-12 watersheds. Atrazine over + 0.75 lb/A on highly-erodible land may require soil conservation + practices (cover crops, buffer strips). +- **Dicamba**: see Topic: dicamba-application-rules. OH cutoff has + historically been June 30 for in-crop soybean. +- **2,4-D + 2,4-DB**: drift sensitivity in OH given the high mix of + row-crop, specialty-crop (tomato, grape), and homeowner areas. + +When recommending to OH farmers, surface H2Ohio cost-share options +if relevant (no-till + cover crops + variable-rate nutrient +management can offset chemistry needs). + +## Topic: scn-and-other-seed-treatment-context + +Soybean cyst nematode (SCN) is universal in OH/IN/IL/IA. When +recommending a soybean program, *always* check whether the seed +treatment includes nematicide/SCN protection: + +- **Abamectin** (Avicta) — original SCN nematicide seed treatment +- **Fluopyram** (ILeVO) — broader nematode + SDS suppression +- **Pydiflumetofen** (Saltro) — newer; nematode + SDS protection + without ILeVO's halo effect on seedling +- **Pasteuria nishizawae** (Clariva) — biological nematicide + +This isn't strictly a "pesticide label" topic but it's the right +context for ANY soybean herbicide recommendation: a great herbicide +program on SCN-infested fields without nematicide seed treatment is +leaving yield on the floor. + +## Topic: drift-management-essentials + +Drift mitigation is increasingly enforced and increasingly important +for off-target damage liability: + +- **Wind**: most labels specify 3-10 mph at boom height. Below 3 mph + risks temperature inversion (worst case: cool morning over warm + ground, fine spray hangs and drifts miles). +- **Temperature inversion detection**: smoke test. Smoke that rises + and dissipates = no inversion. Smoke that hangs flat = inversion. +- **Nozzle selection**: AIXR / TTI / TT — air-induction nozzles + produce larger droplets that drift less. Required for dicamba/2,4-D. +- **Boom height**: lower is better for drift. 20 inches over canopy + for AIXR; manufacturer specs for TTI. +- **Buffer to sensitive crops**: tomatoes (esp. for 2,4-D + dicamba), + grapes, organic fields, residential lawns. Always check downwind. +- **Adjuvant choice affects drift**: NIS reduces droplet size; deposition + aids (e.g., InterLock, Strike Zone) increase droplet weight and reduce + drift. + +## Topic: how-to-format-recommendations + +When the LLM produces a pesticide recommendation, the canonical shape +that makes it actionable for a farmer: + +``` +**[Product name]** (EPA Reg [X-Y]) — [active ingredient(s)], [Group N] +- **Rate:** [from label, with range] +- **Timing:** [growth stage / DAT] +- **Carrier + adjuvant:** [GPA + adjuvant requirements] +- **REI/PHI:** [from label] +- **Label PDF:** [URL from search result] +- **Notes:** [resistance group, drift considerations, RUP framing if + applicable, tank-mix antagonism warnings] +``` + +Skip the canonical shape and the recommendation is hard to apply +without the farmer doing their own label hunting. The corpus has +everything needed — surface it cleanly. diff --git a/docs_mcp/server.py b/docs_mcp/server.py index 07ce84f..d527e34 100644 --- a/docs_mcp/server.py +++ b/docs_mcp/server.py @@ -599,16 +599,107 @@ def corpus_status() -> str: # --------------------------------------------------------------------------- -# Stubs for later phases — keep the signatures in this file so refactors -# don't lose the contracts. Implementations come per phase. +# Phase 11 — Curated agronomy / label-handling knowledge +# --------------------------------------------------------------------------- + +LESSONS_PATH = Path(__file__).resolve().parent / "lessons.md" +_lessons_cache: tuple[str, list[tuple[str, str]]] | None = None # (full, sections) + + +def _load_lessons() -> tuple[str, list[tuple[str, str]]]: + """Read lessons.md once, split into (topic_slug, body) sections.""" + global _lessons_cache + if _lessons_cache is not None: + return _lessons_cache + if not LESSONS_PATH.exists(): + _lessons_cache = ("", []) + return _lessons_cache + full = LESSONS_PATH.read_text(encoding="utf-8") + sections: list[tuple[str, str]] = [] + # Split on lines like "## Topic: " (case-sensitive marker) + parts = re.split(r"^## Topic:\s+(\S+)\s*$", full, flags=re.MULTILINE) + # parts = [preamble, slug1, body1, slug2, body2, ...] + for i in range(1, len(parts), 2): + slug = parts[i].strip() + body = parts[i + 1].strip() if i + 1 < len(parts) else "" + sections.append((slug, body)) + _lessons_cache = (full, sections) + return _lessons_cache + + +@mcp.tool() +def ppls_api_lessons( + topic: Annotated[ + str | None, + Field(description="OPTIONAL: topic slug or substring (e.g., " + "'rup-handling', 'dicamba', 'rei'). Omit to get " + "the full table of contents."), + ] = None, +) -> str: + """Surface curated agronomy + label-handling knowledge that supplements + the raw label corpus. + + **Call this proactively whenever you're about to give a pesticide + recommendation from search_docs results.** The lessons cover: + EPA signal words, REI/PHI fundamentals, RUP handling, 2(ee)/24(c) + supplemental labels, tank-mix and resistance-management + fundamentals (HRAC/FRAC/IRAC groups), product-specific application + rules (glufosinate, dicamba), Lake Erie watershed considerations + for Ohio, SCN context for soybean, drift management, and the + canonical recommendation format the farmer expects. + + Without these lessons, your recommendations risk being technically + correct but missing the regulatory framing, resistance group + callouts, RUP applicator requirements, or off-target-damage + warnings that make them actionable. Call this first; cite specific + lessons in your response. + """ + with TimedCall("ppls_api_lessons", {"topic": topic}) as _call: + full, sections = _load_lessons() + if not sections: + _call.set(sections=0) + return "_(lessons.md not found — Phase 11 knowledge layer not populated)_" + + if not topic: + _call.set(sections=len(sections), returned="toc") + toc_lines = [ + "# PPLS API lessons — table of contents", + "", + f"Call `ppls_api_lessons(topic='')` to fetch a specific section.", + "", + ] + for slug, body in sections: + # First non-blank, non-list line as the headline summary + summary = "" + for line in body.splitlines(): + s = line.strip() + if s and not s.startswith(("-", "*", "|", "```")): + summary = s[:140] + break + toc_lines.append(f"- **`{slug}`** — {summary}") + return "\n".join(toc_lines) + + topic_lc = topic.lower() + matched = [(slug, body) for slug, body in sections if topic_lc in slug.lower()] + if not matched: + _call.set(sections=0, returned="no-match") + available = ", ".join(f"`{s}`" for s, _ in sections) + return (f"_(no lesson matched topic `{topic}`. Available: {available})_") + + _call.set(sections=len(matched), returned="match") + out: list[str] = [] + for slug, body in matched: + out.append(f"## Topic: {slug}\n\n{body}") + return "\n\n---\n\n".join(out) + + +# --------------------------------------------------------------------------- +# Stubs for later phases # --------------------------------------------------------------------------- # @mcp.tool() # Phase 12 # def find_doc_inconsistencies(scope_query: str, ...) -> str: ... -# @mcp.tool() # Phase 11 -# def ppls_label_lessons(topic: str | None = None) -> str: ... - # =========================================================================== # Entry point