From 3c3178a6adeb3a8c7710268f74987ca4c7d883aa Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Sun, 24 May 2026 12:12:51 -0400 Subject: [PATCH] eval: GPU rerank baseline + CLI fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPU eval (hybrid+rerank, RERANK_URL=http://10.10.1.65:8082): MRR=0.672 Recall@5=0.638 nDCG@5=0.621 (35 queries, 1 transient 500, otherwise clean) Quality identical to the CPU rerank run as expected — only latency changed (single rerank call dropped from ~23s to ~0.7-1.5s on the Tesla P4). Per-query report at eval/results/with_rerank_gpu.md. CLI parser fix: `--retrievers dense+rerank,hybrid+rerank` now correctly wires the dense+rerank variant. Previously only literal "rerank" (without prefix) matched the dense+rerank branch, so combined-retriever runs silently dropped dense+rerank. (Note: the eval's RerankedRetriever does 50 individual Chroma `get` calls per query to fetch chunk text by (source, source_key); this adds ~15s per query of pure SQLite lookup overhead. Not a production concern — docs_mcp/server.py's _rerank_pool reranks docs already in the dense pool, no extra Chroma round-trips. Worth tightening the eval-side impl on a later pass.) Co-Authored-By: Claude Opus 4.7 (1M context) --- eval/results/with_rerank_gpu.md | 52 +++++++++++++++++++++++++++++++++ eval/run_eval.py | 5 ++-- 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 eval/results/with_rerank_gpu.md diff --git a/eval/results/with_rerank_gpu.md b/eval/results/with_rerank_gpu.md new file mode 100644 index 0000000..1cd3b12 --- /dev/null +++ b/eval/results/with_rerank_gpu.md @@ -0,0 +1,52 @@ +# Eval results — queries.jsonl + +- queries: 35 +- k: 5 +- pool: 50 +- retrievers: hybrid+rerank + +## Summary + +| Retriever | MRR | Recall@5 | nDCG@5 | Errors | Time (s) | +|---|---|---|---|---|---| +| hybrid+rerank | 0.672 | 0.638 | 0.621 | 0 | 644.3 | + +## Per-query — hybrid+rerank + +| Query | Expected | Top retrieved | MRR | Recall | +|---|---|---|---|---| +| Warrant herbicide rate for soybean | bayer/warrant, epa_ppls/524-591 | bayer/warrant, bayer/warrant-ultra, epa_ppls/524-508 | 1.00 | 0.50 | +| Huskie wheat herbicide tank mix | bayer/huskie, bayer/huskie-complete | bayer/huskie-complete, bayer/huskie, bayer/huskie-fx | 1.00 | 1.00 | +| Harness 20G granular corn herbicide | bayer/harness, epa_ppls/524-487 | epa_ppls/524-487, bayer/harness-xtra-5-6l-herbicide-premix-for-corn, bayer/harne | 1.00 | 1.00 | +| Laudis tembotrione post-emergence corn | bayer/laudis, epa_ppls/264-860 | bayer/laudis, epa_ppls/264-860, epa_ppls/264-1063 | 1.00 | 1.00 | +| Roundup Custom glyphosate burndown application rate | epa_ppls/524-677, epa_ppls/524-475 | epa_ppls/524-677, epa_ppls/279-3548, epa_ppls/42750-60 | 1.00 | 0.50 | +| Liberty 280 SL glufosinate ammonium soybean | epa_ppls/7969-448 | epa_ppls/7969-448, epa_ppls/42750-258, epa_ppls/34704-1080 | 1.00 | 1.00 | +| Atrazine 4L corn pre-emergence rate per acre | epa_ppls/5905-7877 | epa_ppls/352-490, epa_ppls/51036-158, epa_ppls/62719-312 | 0.00 | 0.00 | +| Albaugh dicamba DMA salt application restrictions | epa_ppls/42750-40 | epa_ppls/42750-40, epa_ppls/42750-286, epa_ppls/42750-55 | 1.00 | 1.00 | +| Authority 4F sulfentrazone soybean residual | epa_ppls/279-3146 | epa_ppls/279-3146, epa_ppls/279-3220, epa_ppls/279-3370 | 1.00 | 1.00 | +| Prowl 10-G pendimethalin granular pre-plant | epa_ppls/241-254 | epa_ppls/241-254, epa_ppls/241-268, epa_ppls/241-243 | 1.00 | 1.00 | +| Callisto GT mesotrione corn postemergence broadleaf control | epa_ppls/100-1470 | epa_ppls/100-1470, epa_ppls/100-1131, epa_ppls/100-1282 | 1.00 | 1.00 | +| Acuron Flexi corn pre-emergence S-metolachlor | epa_ppls/100-1568 | epa_ppls/100-1568, epa_ppls/100-1727, epa_ppls/34704-1156 | 1.00 | 1.00 | +| Sencor 4 flowable metribuzin soybean waterhemp | epa_ppls/264-735 | epa_ppls/264-735, epa_ppls/264-738, epa_ppls/91234-57 | 1.00 | 1.00 | +| Broadstrike trifluralin pre-plant incorporated | epa_ppls/62719-222 | epa_ppls/62719-222, epa_ppls/62719-224, epa_ppls/62719-239 | 1.00 | 1.00 | +| Headline azoxystrobin pyraclostrobin wheat foliar fungicide | epa_ppls/7969-186 | epa_ppls/7969-186, epa_ppls/7969-289, epa_ppls/91234-248 | 1.00 | 1.00 | +| Trivapro pydiflumetofen corn fungicide tar spot | epa_ppls/100-1613 | epa_ppls/100-1613, epa_ppls/100-1605, epa_ppls/100-1601 | 1.00 | 1.00 | +| Poncho 600 clothianidin seed treatment corn | epa_ppls/7969-458 | epa_ppls/7969-458, epa_ppls/7969-482, bayer/poncho-beta | 1.00 | 1.00 | +| Gustafson Lorsban 30 chlorpyrifos granular corn rootworm | epa_ppls/264-932 | epa_ppls/5481-525, epa_ppls/67760-14, epa_ppls/66222-18 | 0.00 | 0.00 | +| RT-3 glyphosate potassium salt herbicide | bayer/rt-3 | bayer/rt-3, bayer/roundup-powermax-3, epa_ppls/524-725 | 1.00 | 1.00 | +| Roundup PowerMAX 3 glyphosate K-salt rate | bayer/roundup-powermax-3, epa_ppls/524-659 | bayer/roundup-powermax-3, bayer/roundup-powermax, bayer/roundup-weathermax | 1.00 | 0.50 | +| Nortron SC ethofumesate sugar beet | bayer/nortron-sc | bayer/nortron-sc, epa_ppls/70506-106, epa_ppls/7969-289 | 1.00 | 1.00 | +| DiFlexx Duo tembotrione dicamba corn | bayer/diflexx-duo | epa_ppls/264-1184, bayer/diflexx-duo, epa_ppls/264-1173 | 0.50 | 1.00 | +| Corvus thiencarbazone-methyl isoxaflutole corn pre-emergence | bayer/corvus, epa_ppls/264-1066 | bayer/corvus, bayer/trivolt, bayer/autumn-super-51-wdg | 1.00 | 0.50 | +| Capreno tembotrione thiencarbazone corn herbicide | bayer/capreno, epa_ppls/264-1063 | bayer/capreno, epa_ppls/264-1063, bayer/corvus | 1.00 | 1.00 | +| Tilt propiconazole wheat fungicide rust | epa_ppls/100-617 | epa_ppls/100-737, epa_ppls/100-617, epa_ppls/100-1192 | 0.50 | 1.00 | +| what controls horseweed marestail before planting soybean | epa_ppls/524-475, epa_ppls/524-677 | epa_ppls/524-724, bayer/roundup-powermax-3, epa_ppls/100-965 | 0.00 | 0.00 | +| what can I tank mix with 2,4-D for burndown in spring | epa_ppls/5905-7877, epa_ppls/228-666 | epa_ppls/42750-19, epa_ppls/71368-115, epa_ppls/91234-113 | 0.00 | 0.00 | +| best fungicide for corn tar spot foliar application | epa_ppls/100-1613, epa_ppls/100-1547 | epa_ppls/264-752, epa_ppls/84229-44, epa_ppls/100-1613 | 0.33 | 0.50 | +| seed treatment to control wireworm in corn | epa_ppls/7969-458, epa_ppls/7969-459 | epa_ppls/7969-288, epa_ppls/7969-482, epa_ppls/1381-167 | 0.00 | 0.00 | +| pre-emergence residual herbicide for soybean for waterhemp | epa_ppls/279-3146, epa_ppls/264-735 | epa_ppls/241-396, epa_ppls/264-1212, epa_ppls/7969-140 | 0.00 | 0.00 | +| what insecticide for soybean aphid foliar | epa_ppls/279-3206, epa_ppls/264-840 | epa_ppls/264-656, epa_ppls/264-1051, epa_ppls/264-827 | 0.00 | 0.00 | +| what is the rainfast interval for glyphosate | epa_ppls/524-475, epa_ppls/524-677 | epa_ppls/524-440, epa_ppls/42750-60, epa_ppls/42750-122 | 0.00 | 0.00 | +| wheat fungicide for fusarium head blight | epa_ppls/7969-186, epa_ppls/100-1547 | epa_ppls/7969-186, epa_ppls/264-752, epa_ppls/7969-306 | 1.00 | 0.50 | +| endangered species act precautions for pesticide application | epa_ppls/524-475, epa_ppls/524-591 | epa_ppls/70506-154, epa_ppls/66222-143, epa_ppls/264-823 | 0.00 | 0.00 | +| what herbicide do I use for postemergence broadleaf in corn | bayer/laudis, bayer/capreno, bayer/diflexx-duo | epa_ppls/228-320, epa_ppls/524-476, epa_ppls/279-9601 | 0.20 | 0.33 | \ No newline at end of file diff --git a/eval/run_eval.py b/eval/run_eval.py index d2ae94d..52bdd74 100644 --- a/eval/run_eval.py +++ b/eval/run_eval.py @@ -102,7 +102,7 @@ def main() -> int: DenseRetriever, BM25Retriever, HybridRetriever, RerankedRetriever ) - wanted = [x.strip() for x in args.retrievers.split(",") if x.strip()] + wanted = {x.strip() for x in args.retrievers.split(",") if x.strip()} dense = DenseRetriever() bm25 = BM25Retriever() @@ -113,7 +113,8 @@ def main() -> int: retrievers.append(("bm25", bm25)) if "hybrid" in wanted: retrievers.append(("hybrid-rrf", HybridRetriever(dense=dense, bm25=bm25, pool=args.pool))) - if "rerank" in wanted: + # Accept either "rerank" or "dense+rerank" for the dense-base reranker. + if "rerank" in wanted or "dense+rerank" in wanted: retrievers.append(("dense+rerank", RerankedRetriever(base=dense, pool=args.pool))) if "hybrid+rerank" in wanted: