diff --git a/docs_mcp/server.py b/docs_mcp/server.py index 28b1345..44c5f96 100644 --- a/docs_mcp/server.py +++ b/docs_mcp/server.py @@ -35,8 +35,11 @@ log = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Product-specific configuration. Set these for each new build. # --------------------------------------------------------------------------- -PRODUCT_NAME = os.environ.get("PRODUCT_NAME", "myproduct") -PRODUCT_DOCS_URL = os.environ.get("PRODUCT_DOCS_URL", "https://docs.example.com") +PRODUCT_NAME = os.environ.get("PRODUCT_NAME", "hvm") +PRODUCT_DOCS_URL = os.environ.get( + "PRODUCT_DOCS_URL", + "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007735en_us", +) COLLECTION = f"{PRODUCT_NAME}_docs" # Paths inside the deployed container (and matching layout locally for dev). @@ -104,6 +107,15 @@ def _build_where(version: str | None, platform: str | None, bundle_id: str | Non return {"$and": conds} +def _where_for_bm25(version: str | None, platform: str | None, bundle_id: str | None) -> dict | None: + """BM25Index.query takes a flat dict of equality filters.""" + w: dict[str, str] = {} + if version: w["version"] = version + if platform: w["platform"] = platform + if bundle_id: w["bundle_id"] = bundle_id + return w or None + + def _read_page(bundle_id: str, page_id: str) -> tuple[str, dict] | None: """Read a corpus page off disk. Returns (markdown_body, metadata_dict).""" md_path = CORPUS / bundle_id / (page_id + ".md") @@ -113,6 +125,115 @@ def _read_page(bundle_id: str, page_id: str) -> tuple[str, dict] | None: return md_path.read_text(), json.loads(json_path.read_text()) +_CHROMA = None +_BM25 = None + + +def _collection(): + """Lazy Chroma collection handle. Cached after first call.""" + global _CHROMA + if _CHROMA is None: + import chromadb + from chromadb.config import Settings + from rag.embeddings import embedding_function + + client = chromadb.PersistentClient( + path=str(CHROMA_DIR), + settings=Settings(anonymized_telemetry=False), + ) + _CHROMA = client.get_collection(COLLECTION, embedding_function=embedding_function()) + return _CHROMA + + +def _bm25(): + """Lazy BM25Index handle. None if the FTS5 db isn't built.""" + global _BM25 + if _BM25 is None: + if not BM25_DB.exists(): + return None + try: + from rag.bm25 import BM25Index + _BM25 = BM25Index(str(BM25_DB)) + except Exception as e: # defensive: hybrid must never block dense + log.warning("BM25 unavailable, falling back to dense-only: %s", e) + return None + return _BM25 + + +def _enrich_from_chroma(col, chunk_ids: list[str], fused: list | None) -> tuple[list[str], list[dict], list[float]]: + """Fetch document text + metadata for a list of chunk ids from Chroma, in order.""" + if not chunk_ids: + return [], [], [] + g = col.get(ids=chunk_ids, include=["documents", "metadatas"]) + by_id = {i: (d, m) for i, d, m in zip(g["ids"], g["documents"], g["metadatas"])} + docs = [by_id[i][0] for i in chunk_ids if i in by_id] + metas = [by_id[i][1] for i in chunk_ids if i in by_id] + if fused is not None: + dists = [1.0 - score for _id, score, _src in fused[:len(docs)]] + else: + dists = [0.0] * len(docs) + return docs, metas, dists + + +def _rerank(query: str, candidates: list[tuple[str, str]]) -> list[tuple[str, str]] | None: + """POST to RERANK_URL /v1/rerank, return candidates re-ordered by relevance. + + `candidates` is `[(chunk_id, text), ...]`. Texts are truncated to ~2000 chars + before sending so we never blow past jina-reranker's 1024-token per-pair + cap (which 400s the entire batch). The full untruncated text still goes + back to the user from Chroma; truncation is reranking-only. + + Returns None on any failure — caller treats that as "skip reranking, + keep retrieval-order candidates." + """ + if not RERANK_URL or not candidates: + return None + try: + import httpx + payload = { + "query": query, + "documents": [(text or "")[:2000] for _cid, text in candidates], + "top_n": len(candidates), + } + with httpx.Client(timeout=RERANK_TIMEOUT) as c: + r = c.post(f"{RERANK_URL}/v1/rerank", json=payload) + r.raise_for_status() + results = r.json().get("results") or [] + order = [candidates[item["index"]] for item in results + if isinstance(item.get("index"), int) and 0 <= item["index"] < len(candidates)] + return order or None + except Exception as e: + log.warning("rerank failed, keeping retrieval order: %s", e) + return None + + +def _rrf_fuse(*ranked_lists: list[str], k: int = RRF_K) -> list[tuple[str, float, dict]]: + """Reciprocal Rank Fusion. Each ranked list is a sequence of ids in + descending relevance. Returns [(id, fused_score, per_retriever_contrib), ...] + sorted by score desc.""" + scores: dict[str, float] = {} + sources: dict[str, dict] = {} + names = ("dense", "bm25", "extra") + for idx, lst in enumerate(ranked_lists): + src = names[idx] if idx < len(names) else f"r{idx}" + for rank, ident in enumerate(lst, start=1): + scores[ident] = scores.get(ident, 0.0) + 1.0 / (k + rank) + sources.setdefault(ident, {})[src] = rank + ranked = sorted(scores.items(), key=lambda kv: -kv[1]) + return [(ident, score, sources[ident]) for ident, score in ranked] + + +def _source_url(bundle_id: str, page_id: str) -> str: + """Build the canonical docs portal URL for a (bundle, page) pair.""" + b = _bundles().get(bundle_id) + if not b: + return "" + doc_id = b.get("doc_id", "") + if page_id.startswith("GUID-"): + return f"https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}&page={page_id}.html" + return f"https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}" + + # =========================================================================== # Tools # =========================================================================== @@ -134,7 +255,7 @@ def search_docs( ] = None, k: Annotated[int, Field(description="Number of results to return.", ge=1, le=50)] = 10, ) -> str: - """Search the {product} docs corpus. + """Search the HPE Morpheus VM Essentials (HVM) docs corpus. Returns the top-k most relevant chunks (with full source page URLs) given a natural-language query. Optional filters narrow the search @@ -142,20 +263,130 @@ def search_docs( first if you need to discover the available facet values. Call this tool whenever the user asks anything that should be - answerable from the official product documentation. + answerable from the official product documentation — install, + upgrade, configuration, backups, networking, HVM clusters, the + Morpheus UI, or any 8.1.x release-notes question. """ with TimedCall("search_docs", { "query": query, "version": version, "platform": platform, "bundle_id": bundle_id, "k": k, }) as _call: - # TODO Phase 2-3: query Chroma collection (see rag/index.py for - # how it was built). Render the top-k chunks as markdown with - # source URLs. - # TODO Phase 6: optional reranker via _rerank() if RERANK_URL set. - # TODO Phase 8: hybrid retrieval if HYBRID_SEARCH=true — run - # dense + BM25 in parallel, RRF-fuse, hand merged pool to rerank. - _call.set(hits_returned=0) - raise NotImplementedError("Phase 2/3: implement Chroma query + rendering") + try: + col = _collection() + except Exception as e: + log.exception("chroma collection unavailable") + _call.set(hits_returned=0, error=str(e)) + return f"_(search backend unavailable: {e})_" + + where = _build_where(version, platform, bundle_id) + bm25_where = _where_for_bm25(version, platform, bundle_id) + pool = max(k * 5, 50) + + # Retrieval mode selection. Eval on this corpus (2026-05-22, 22 golden + # queries) showed BM25 MRR=0.88 vs dense MRR=0.54 vs hybrid MRR=0.69 — + # HPE structured docs use controlled vocabulary, so lexical match wins. + # Dense is kept as fallback when BM25 has no tokens to chew on (e.g. + # purely stopword queries). HYBRID_SEARCH=true forces RRF fusion. + bm = _bm25() + docs: list[str] = [] + metas: list[dict] = [] + dists: list[float] = [] + retrieval_mode = "dense" + top1_source = "dense_only" + + if HYBRID_SEARCH and bm is not None: + try: + dense_res = col.query(query_texts=[query], n_results=pool, where=where) + dense_ids = (dense_res.get("ids") or [[]])[0] + bm_hits = bm.query(query, n=pool, where=bm25_where) + bm_ids = [cid for cid, _s in bm_hits] + fused = _rrf_fuse(dense_ids, bm_ids) + docs, metas, dists = _enrich_from_chroma(col, [c for c, _, _ in fused[:k]], fused) + if fused: + src0 = fused[0][2] + top1_source = ("both" if {"dense", "bm25"} <= set(src0) + else "bm25_only" if "bm25" in src0 + else "dense_only") + retrieval_mode = "hybrid" + except Exception as e: + log.warning("hybrid failed, falling back to BM25→dense: %s", e) + + if not docs and bm is not None: + try: + bm_hits = bm.query(query, n=k, where=bm25_where) + if bm_hits: + ids = [cid for cid, _s in bm_hits[:k]] + docs, metas, _ = _enrich_from_chroma(col, ids, None) + # FTS5 returns negative scores (lower=better). Map onto a + # similarity-ish [0..1] just for display. + dists = [max(0.0, min(1.0, 1.0 - abs(s) / 20.0)) for _id, s in bm_hits[:k]] + retrieval_mode = "bm25" + top1_source = "bm25_only" + except Exception as e: + log.warning("BM25 retrieval failed, falling back to dense: %s", e) + + if not docs: + res = col.query(query_texts=[query], n_results=k, where=where) + docs = (res.get("documents") or [[]])[0] + metas = (res.get("metadatas") or [[]])[0] + dists = (res.get("distances") or [[]])[0] + + reranker_fired = False + if RERANK_URL and docs: + # Pull a deeper pool to give the reranker something to chew on. + # We over-fetch up to RERANK_POOL chunks from whichever retriever + # already won, then ask the reranker to pick the final top-k. + pool_size = max(k, RERANK_POOL) + if len(docs) < pool_size: + if retrieval_mode == "bm25": + extra = bm.query(query, n=pool_size, where=bm25_where) if bm else [] + extra_ids = [cid for cid, _s in extra] + else: + extra_res = col.query(query_texts=[query], n_results=pool_size, where=where) + extra_ids = (extra_res.get("ids") or [[]])[0] + if extra_ids: + d2, m2, _ = _enrich_from_chroma(col, extra_ids, None) + docs, metas = d2, m2 + dists = [0.0] * len(docs) + # Reranker scores chunk_ids — collapse to (id, text) tuples + pairs = list(zip( + [f"{m.get('bundle_id','')}::{m.get('page_id','')}::{m.get('ordinal',0)}" for m in metas], + docs, + )) + reranked = _rerank(query, pairs) + if reranked is not None: + # Re-sort docs/metas to match. Recompute distances as descending + # ordinal ranks so display still shows a useful score. + by_cid = {p[0]: i for i, p in enumerate(pairs)} + order = [by_cid[cid] for cid, _t in reranked if cid in by_cid] + docs = [docs[i] for i in order][:k] + metas = [metas[i] for i in order][:k] + dists = [1.0 - (rank / len(reranked)) for rank, _ in enumerate(reranked)][:len(docs)] + reranker_fired = True + else: + docs, metas, dists = docs[:k], metas[:k], dists[:k] + + _call.set(hits_returned=len(docs), retrieval_mode=retrieval_mode, + top1_source=top1_source, reranker_fired=reranker_fired) + if not docs: + return f"_No matches for `{query}`._" + + out = [f"# {len(docs)} result(s) for `{query}`", ""] + for doc, meta, dist in zip(docs, metas, dists): + bid = meta.get("bundle_id", "") + pid = meta.get("page_id", "") + title = meta.get("title") or pid + ver = meta.get("version") or "" + url = _source_url(bid, pid) + header = f"## {title}" + if ver: + header += f" _(v{ver})_" + out.append(header) + out.append(f"[{bid}/{pid}]({url}) · score={1 - dist:.3f}") + out.append("") + out.append(doc.strip()) + out.append("") + return "\n".join(out) @mcp.tool() @@ -175,9 +406,21 @@ def get_page( return f"Page not found: {bundle_id}/{page_id}" md, meta = data _call.set(found=True, page_chars=len(md)) - # TODO: add a metadata header (title, version, source URL) above - # the body. Product-specific shape. - return md + title = meta.get("title") or page_id + ver = meta.get("version") + parent = meta.get("parent_title") + url = _source_url(bundle_id, page_id) + header = [f"# {title}"] + ctx = [] + if ver: + ctx.append(f"version **{ver}**") + if parent: + ctx.append(f"in **{parent}**") + if ctx: + header.append("_" + " · ".join(ctx) + "_") + header.append(f"[source]({url})") + header.append("") + return "\n".join(header) + "\n" + md @mcp.tool() @@ -193,14 +436,22 @@ def list_versions() -> str: versions = sorted({b.get("version") for b in cat.values() if b.get("version")}) platforms = sorted({b.get("platform") for b in cat.values() if b.get("platform")}) _call.set(versions=len(versions), platforms=len(platforms)) + products = sorted({b.get("product") for b in cat.values() if b.get("product")}) lines = [f"# Facets across {len(cat)} bundle(s)", ""] if versions: - lines.append("## Versions"); lines.append("") - for v in versions: lines.append(f"- `{v}`") - lines.append("") + lines += ["## Versions", ""] + [f"- `{v}`" for v in versions] + [""] if platforms: - lines.append("## Platforms"); lines.append("") - for p in platforms: lines.append(f"- `{p}`") + lines += ["## Platforms", ""] + [f"- `{p}`" for p in platforms] + [""] + if products: + lines += ["## Product / doc types", ""] + [f"- {p}" for p in products] + [""] + lines += ["## Bundles", ""] + for slug in sorted(cat): + b = cat[slug] + kind = b.get("product") or "" + ver = b.get("version") + pages = b.get("page_count", "?") + label = f"{kind} {ver}".strip() if ver else kind + lines.append(f"- `{slug}` — {label} ({pages} pages)") return "\n".join(lines) diff --git a/eval/queries.jsonl b/eval/queries.jsonl new file mode 100644 index 0000000..7a16b5d --- /dev/null +++ b/eval/queries.jsonl @@ -0,0 +1,22 @@ +{"query": "VME Manager sizing recommendations small medium large", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-0F55384D-5632-4CDC-AA39-A21C1C089AFA"}], "tags": ["deployment", "sizing", "keyword-heavy"]} +{"query": "create an instance backup", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-9DA38943-BF95-446D-AB09-32323160D51B"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-9DA38943-BF95-446D-AB09-32323160D51B"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-9DA38943-BF95-446D-AB09-32323160D51B"}], "tags": ["backups", "how-to"]} +{"query": "what are the host hardware requirements", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-BE7493B3-B866-4269-9C13-ABFCF84658F2"}], "tags": ["deployment", "prereqs"]} +{"query": "Japanese keyboard layout in console sessions", "expected": [{"bundle_id": "hvm_release_notes_8_1_2", "page_id": "sd00007734en_us"}], "tags": ["release-notes", "8.1.2", "localization"]} +{"query": "elevate to Morpheus Enterprise", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-ECCA4FDD-37C8-45CE-A71F-C6E73B3BA713"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-ECCA4FDD-37C8-45CE-A71F-C6E73B3BA713"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-ECCA4FDD-37C8-45CE-A71F-C6E73B3BA713"}], "tags": ["upgrade"]} +{"query": "create an HVM cluster", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-99397996-8315-49C1-9E2F-2EED51CE03F3"}], "tags": ["deployment", "cluster"]} +{"query": "back up and restore the VM Essentials manager", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-2686BF33-E793-4BF9-98BE-82F800ED03EB"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-2686BF33-E793-4BF9-98BE-82F800ED03EB"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-2686BF33-E793-4BF9-98BE-82F800ED03EB"}], "tags": ["backups", "disaster-recovery"]} +{"query": "configure storage buckets", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-30F59A1F-0573-4D88-A679-B5C5168BCE6D"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-30F59A1F-0573-4D88-A679-B5C5168BCE6D"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-30F59A1F-0573-4D88-A679-B5C5168BCE6D"}], "tags": ["storage"]} +{"query": "disable two-factor authentication", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-0CBD386D-DED9-474D-A9F3-587F58B2D22D"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-0CBD386D-DED9-474D-A9F3-587F58B2D22D"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-0CBD386D-DED9-474D-A9F3-587F58B2D22D"}], "tags": ["security", "auth"]} +{"query": "upgrading the manager", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-4EDB4324-2C3B-435F-80FF-F430D02A2FDA"}], "tags": ["upgrade"]} +{"query": "supported storage protocols", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-6EBCB223-4C48-456F-950C-C8ED5610A0F8"}], "tags": ["deployment", "storage"]} +{"query": "network bonding configuration", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-6F1B4C62-CCE2-4AE5-9CE7-83C407BFE290"}], "tags": ["networking"]} +{"query": "what TCP ports does HVM need open", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-97DDED8D-EE6B-4819-8080-E163FD533CAB"}], "tags": ["networking", "firewall"]} +{"query": "install HVM OS on a host server", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-28F18596-4902-4CD1-83F3-1411430C5534"}], "tags": ["deployment", "install"]} +{"query": "configure Linux images for HVM clusters", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-8D494112-C361-4300-B7BF-B4DFE06E871C"}], "tags": ["deployment", "images"]} +{"query": "create a user account", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-21972435-BFD0-481F-A3E6-A52B116989F3"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-21972435-BFD0-481F-A3E6-A52B116989F3"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-21972435-BFD0-481F-A3E6-A52B116989F3"}], "tags": ["admin", "users"]} +{"query": "Openstack Swift bucket", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-0D60567D-2DD3-4D8B-92D6-6849C7D773EA"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-0D60567D-2DD3-4D8B-92D6-6849C7D773EA"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-0D60567D-2DD3-4D8B-92D6-6849C7D773EA"}], "tags": ["storage", "rare-token"]} +{"query": "API reference for VM Essentials", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-88494051-74D1-4BD9-BAFF-134A573FF77B"}], "tags": ["api"]} +{"query": "Worker version compatibility 8.1.2", "expected": [{"bundle_id": "hvm_release_notes_8_1_2", "page_id": "sd00007734en_us"}], "tags": ["release-notes", "8.1.2"]} +{"query": "recommended converged networking setup scenario", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-2DD9D39D-9031-4BB5-A4ED-A0179BEF5259"}], "tags": ["networking", "deployment"]} +{"query": "qualification matrix supported hardware", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-E3635F0A-11DA-4078-8C3A-8D4B75724849"}], "tags": ["deployment", "compatibility"]} +{"query": "configure the VM Essentials manager initial setup", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-456E190C-E912-4079-A691-8D2368D63748"}], "tags": ["deployment", "configuration"]} diff --git a/eval/results/baseline.md b/eval/results/baseline.md new file mode 100644 index 0000000..ddc088a --- /dev/null +++ b/eval/results/baseline.md @@ -0,0 +1,126 @@ +# Retrieval eval — k=5 + +_22 hand-curated queries, generated 2026-05-22 12:35:57_ + +| Retriever | MRR | Recall@5 | nDCG@5 | avg latency | +| --- | ---: | ---: | ---: | ---: | +| `dense` | 0.539 | 0.621 | 0.558 | 88ms | +| `bm25` | 0.880 | 0.909 | 0.883 | 3ms | +| `hybrid_rrf` | 0.692 | 0.818 | 0.713 | 69ms | +| `bm25+rerank` | 0.920 | 0.939 | 0.927 | 490ms | +| `hybrid_rrf+rerank` | 0.875 | 0.894 | 0.881 | 549ms | + +## Per-query results + +| Retriever | Query | MRR | top-1 | +| --- | --- | ---: | --- | +| `dense` | VME Manager sizing recommendations small medium large | 0.000 | `hvm_user_manual_8_1_0/GUID-5D0F2D33-623B-4AA3-...` | +| `dense` | create an instance backup | 0.250 | `hvm_user_manual_8_1_0/GUID-1C1ADB18-710D-40A4-...` | +| `dense` | what are the host hardware requirements | 0.500 | `hvm_deployment_guide/GUID-3DA92E9D-0635-427A-...` | +| `dense` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` | +| `dense` | elevate to Morpheus Enterprise | 0.000 | `hvm_user_manual_8_1_0/GUID-37CA1665-B331-452A-...` | +| `dense` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `dense` | back up and restore the VM Essentials manager | 0.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `dense` | configure storage buckets | 0.100 | `hvm_user_manual_8_1_0/GUID-3AC9CEB6-F60C-46FA-...` | +| `dense` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `dense` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `dense` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `dense` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `dense` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `dense` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `dense` | configure Linux images for HVM clusters | 0.000 | `hvm_deployment_guide/GUID-EC50D35A-3B63-44DD-...` | +| `dense` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` | +| `dense` | Openstack Swift bucket | 0.000 | `hvm_user_manual_8_1_0/GUID-B9045AFD-6F61-42D7-...` | +| `dense` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `dense` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `dense` | recommended converged networking setup scenario | 0.500 | `hvm_deployment_guide/GUID-BDF3EFBF-EA90-4E6F-...` | +| `dense` | qualification matrix supported hardware | 0.500 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `dense` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | +| `bm25` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` | +| `bm25` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` | +| `bm25` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `bm25` | Japanese keyboard layout in console sessions | 0.250 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` | +| `bm25` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` | +| `bm25` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `bm25` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` | +| `bm25` | configure storage buckets | 0.100 | `hvm_user_manual_8_1_0/GUID-6B35AE72-3D19-447A-...` | +| `bm25` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `bm25` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `bm25` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `bm25` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `bm25` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `bm25` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `bm25` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` | +| `bm25` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` | +| `bm25` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` | +| `bm25` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `bm25` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `bm25` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` | +| `bm25` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` | +| `bm25` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | +| `hybrid_rrf` | VME Manager sizing recommendations small medium large | 0.200 | `hvm_user_manual_8_1_0/GUID-BB3046E2-F2D4-4B45-...` | +| `hybrid_rrf` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` | +| `hybrid_rrf` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `hybrid_rrf` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` | +| `hybrid_rrf` | elevate to Morpheus Enterprise | 0.500 | `hvm_user_manual_8_1_0/GUID-37CA1665-B331-452A-...` | +| `hybrid_rrf` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `hybrid_rrf` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` | +| `hybrid_rrf` | configure storage buckets | 0.200 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` | +| `hybrid_rrf` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `hybrid_rrf` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `hybrid_rrf` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `hybrid_rrf` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `hybrid_rrf` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `hybrid_rrf` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `hybrid_rrf` | configure Linux images for HVM clusters | 0.200 | `hvm_deployment_guide/GUID-EC50D35A-3B63-44DD-...` | +| `hybrid_rrf` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` | +| `hybrid_rrf` | Openstack Swift bucket | 0.125 | `hvm_user_manual_8_1_0/GUID-6B35AE72-3D19-447A-...` | +| `hybrid_rrf` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `hybrid_rrf` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `hybrid_rrf` | recommended converged networking setup scenario | 0.500 | `hvm_deployment_guide/GUID-BDF3EFBF-EA90-4E6F-...` | +| `hybrid_rrf` | qualification matrix supported hardware | 0.500 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `hybrid_rrf` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | +| `bm25+rerank` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` | +| `bm25+rerank` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` | +| `bm25+rerank` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `bm25+rerank` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_2/GUID-BCD3CA0B-5C7E-46CB-...` | +| `bm25+rerank` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` | +| `bm25+rerank` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `bm25+rerank` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` | +| `bm25+rerank` | configure storage buckets | 0.250 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` | +| `bm25+rerank` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `bm25+rerank` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `bm25+rerank` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `bm25+rerank` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `bm25+rerank` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `bm25+rerank` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `bm25+rerank` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` | +| `bm25+rerank` | create a user account | 1.000 | `hvm_user_manual_8_1_0/GUID-21972435-BFD0-481F-...` | +| `bm25+rerank` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` | +| `bm25+rerank` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `bm25+rerank` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `bm25+rerank` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` | +| `bm25+rerank` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` | +| `bm25+rerank` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | +| `hybrid_rrf+rerank` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` | +| `hybrid_rrf+rerank` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` | +| `hybrid_rrf+rerank` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `hybrid_rrf+rerank` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` | +| `hybrid_rrf+rerank` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` | +| `hybrid_rrf+rerank` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `hybrid_rrf+rerank` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` | +| `hybrid_rrf+rerank` | configure storage buckets | 0.250 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` | +| `hybrid_rrf+rerank` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `hybrid_rrf+rerank` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `hybrid_rrf+rerank` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `hybrid_rrf+rerank` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `hybrid_rrf+rerank` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `hybrid_rrf+rerank` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `hybrid_rrf+rerank` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` | +| `hybrid_rrf+rerank` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-443F9C0E-FFD9-48B9-...` | +| `hybrid_rrf+rerank` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` | +| `hybrid_rrf+rerank` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `hybrid_rrf+rerank` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `hybrid_rrf+rerank` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` | +| `hybrid_rrf+rerank` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` | +| `hybrid_rrf+rerank` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | diff --git a/eval/results/with_reranker.md b/eval/results/with_reranker.md new file mode 100644 index 0000000..ddc088a --- /dev/null +++ b/eval/results/with_reranker.md @@ -0,0 +1,126 @@ +# Retrieval eval — k=5 + +_22 hand-curated queries, generated 2026-05-22 12:35:57_ + +| Retriever | MRR | Recall@5 | nDCG@5 | avg latency | +| --- | ---: | ---: | ---: | ---: | +| `dense` | 0.539 | 0.621 | 0.558 | 88ms | +| `bm25` | 0.880 | 0.909 | 0.883 | 3ms | +| `hybrid_rrf` | 0.692 | 0.818 | 0.713 | 69ms | +| `bm25+rerank` | 0.920 | 0.939 | 0.927 | 490ms | +| `hybrid_rrf+rerank` | 0.875 | 0.894 | 0.881 | 549ms | + +## Per-query results + +| Retriever | Query | MRR | top-1 | +| --- | --- | ---: | --- | +| `dense` | VME Manager sizing recommendations small medium large | 0.000 | `hvm_user_manual_8_1_0/GUID-5D0F2D33-623B-4AA3-...` | +| `dense` | create an instance backup | 0.250 | `hvm_user_manual_8_1_0/GUID-1C1ADB18-710D-40A4-...` | +| `dense` | what are the host hardware requirements | 0.500 | `hvm_deployment_guide/GUID-3DA92E9D-0635-427A-...` | +| `dense` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` | +| `dense` | elevate to Morpheus Enterprise | 0.000 | `hvm_user_manual_8_1_0/GUID-37CA1665-B331-452A-...` | +| `dense` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `dense` | back up and restore the VM Essentials manager | 0.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `dense` | configure storage buckets | 0.100 | `hvm_user_manual_8_1_0/GUID-3AC9CEB6-F60C-46FA-...` | +| `dense` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `dense` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `dense` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `dense` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `dense` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `dense` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `dense` | configure Linux images for HVM clusters | 0.000 | `hvm_deployment_guide/GUID-EC50D35A-3B63-44DD-...` | +| `dense` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` | +| `dense` | Openstack Swift bucket | 0.000 | `hvm_user_manual_8_1_0/GUID-B9045AFD-6F61-42D7-...` | +| `dense` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `dense` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `dense` | recommended converged networking setup scenario | 0.500 | `hvm_deployment_guide/GUID-BDF3EFBF-EA90-4E6F-...` | +| `dense` | qualification matrix supported hardware | 0.500 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `dense` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | +| `bm25` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` | +| `bm25` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` | +| `bm25` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `bm25` | Japanese keyboard layout in console sessions | 0.250 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` | +| `bm25` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` | +| `bm25` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `bm25` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` | +| `bm25` | configure storage buckets | 0.100 | `hvm_user_manual_8_1_0/GUID-6B35AE72-3D19-447A-...` | +| `bm25` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `bm25` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `bm25` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `bm25` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `bm25` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `bm25` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `bm25` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` | +| `bm25` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` | +| `bm25` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` | +| `bm25` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `bm25` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `bm25` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` | +| `bm25` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` | +| `bm25` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | +| `hybrid_rrf` | VME Manager sizing recommendations small medium large | 0.200 | `hvm_user_manual_8_1_0/GUID-BB3046E2-F2D4-4B45-...` | +| `hybrid_rrf` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` | +| `hybrid_rrf` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `hybrid_rrf` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` | +| `hybrid_rrf` | elevate to Morpheus Enterprise | 0.500 | `hvm_user_manual_8_1_0/GUID-37CA1665-B331-452A-...` | +| `hybrid_rrf` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `hybrid_rrf` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` | +| `hybrid_rrf` | configure storage buckets | 0.200 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` | +| `hybrid_rrf` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `hybrid_rrf` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `hybrid_rrf` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `hybrid_rrf` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `hybrid_rrf` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `hybrid_rrf` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `hybrid_rrf` | configure Linux images for HVM clusters | 0.200 | `hvm_deployment_guide/GUID-EC50D35A-3B63-44DD-...` | +| `hybrid_rrf` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` | +| `hybrid_rrf` | Openstack Swift bucket | 0.125 | `hvm_user_manual_8_1_0/GUID-6B35AE72-3D19-447A-...` | +| `hybrid_rrf` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `hybrid_rrf` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `hybrid_rrf` | recommended converged networking setup scenario | 0.500 | `hvm_deployment_guide/GUID-BDF3EFBF-EA90-4E6F-...` | +| `hybrid_rrf` | qualification matrix supported hardware | 0.500 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `hybrid_rrf` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | +| `bm25+rerank` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` | +| `bm25+rerank` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` | +| `bm25+rerank` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `bm25+rerank` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_2/GUID-BCD3CA0B-5C7E-46CB-...` | +| `bm25+rerank` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` | +| `bm25+rerank` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `bm25+rerank` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` | +| `bm25+rerank` | configure storage buckets | 0.250 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` | +| `bm25+rerank` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `bm25+rerank` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `bm25+rerank` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `bm25+rerank` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `bm25+rerank` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `bm25+rerank` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `bm25+rerank` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` | +| `bm25+rerank` | create a user account | 1.000 | `hvm_user_manual_8_1_0/GUID-21972435-BFD0-481F-...` | +| `bm25+rerank` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` | +| `bm25+rerank` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `bm25+rerank` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `bm25+rerank` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` | +| `bm25+rerank` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` | +| `bm25+rerank` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | +| `hybrid_rrf+rerank` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` | +| `hybrid_rrf+rerank` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` | +| `hybrid_rrf+rerank` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` | +| `hybrid_rrf+rerank` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` | +| `hybrid_rrf+rerank` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` | +| `hybrid_rrf+rerank` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` | +| `hybrid_rrf+rerank` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` | +| `hybrid_rrf+rerank` | configure storage buckets | 0.250 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` | +| `hybrid_rrf+rerank` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` | +| `hybrid_rrf+rerank` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` | +| `hybrid_rrf+rerank` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` | +| `hybrid_rrf+rerank` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` | +| `hybrid_rrf+rerank` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` | +| `hybrid_rrf+rerank` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` | +| `hybrid_rrf+rerank` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` | +| `hybrid_rrf+rerank` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-443F9C0E-FFD9-48B9-...` | +| `hybrid_rrf+rerank` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` | +| `hybrid_rrf+rerank` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` | +| `hybrid_rrf+rerank` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` | +| `hybrid_rrf+rerank` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` | +| `hybrid_rrf+rerank` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` | +| `hybrid_rrf+rerank` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` | diff --git a/eval/retrievers.py b/eval/retrievers.py index bc06a18..872cf31 100644 --- a/eval/retrievers.py +++ b/eval/retrievers.py @@ -10,7 +10,7 @@ to one entry; the highest-ranked chunk's position wins). """ from __future__ import annotations -from typing import Protocol, Iterable +from typing import Iterable, Protocol class Retriever(Protocol): @@ -21,12 +21,17 @@ class Retriever(Protocol): ... -def _collapse_to_pages(chunk_ids: Iterable[tuple[str, str, str]], k: int) -> list[tuple[str, str]]: - """Take a stream of (bundle_id, page_id, chunk_ordinal) and return - the first k unique pages in their first-seen order.""" +def _split_chunk_id(chunk_id: str) -> tuple[str, str, int]: + """`bundle::page::ordinal` -> (bundle, page, int(ordinal)).""" + bid, pid, ordinal = chunk_id.split("::") + return bid, pid, int(ordinal) + + +def _collapse_to_pages(chunk_ids: Iterable[str], k: int) -> list[tuple[str, str]]: seen: set[tuple[str, str]] = set() out: list[tuple[str, str]] = [] - for bid, pid, _ord in chunk_ids: + for cid in chunk_ids: + bid, pid, _ord = _split_chunk_id(cid) key = (bid, pid) if key in seen: continue @@ -37,26 +42,111 @@ def _collapse_to_pages(chunk_ids: Iterable[tuple[str, str, str]], k: int) -> lis return out -# TODO Phase 2/3 — implement these once Chroma + the bm25 module are -# in place. Each one is small (15-30 LOC). The eval harness imports -# from this module by class name. -# -# class DenseRetriever: -# name = "dense" -# def __init__(self, collection): self.col = collection -# def retrieve(self, query, k=10): ... -# -# class RerankedRetriever: -# name = "dense+rerank" -# def __init__(self, collection, rerank_url, pool=200): ... -# def retrieve(self, query, k=10): ... -# -# class BM25Retriever: -# name = "bm25" -# def __init__(self, bm25_index): ... -# def retrieve(self, query, k=10): ... -# -# class HybridRetriever: -# name = "bm25+dense+rrf" -# def __init__(self, dense, bm25, k_rrf=60): ... -# def retrieve(self, query, k=10): ... +class DenseRetriever: + """Chroma cosine search via the live embedding function.""" + name = "dense" + + def __init__(self, collection, pool: int = 50): + self.col = collection + self.pool = pool + + def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]: + res = self.col.query(query_texts=[query], n_results=self.pool) + ids = (res.get("ids") or [[]])[0] + return _collapse_to_pages(ids, k) + + +class BM25Retriever: + """SQLite FTS5 lexical search.""" + name = "bm25" + + def __init__(self, bm25_index, pool: int = 200): + self.bm = bm25_index + self.pool = pool + + def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]: + hits = self.bm.query(query, n=self.pool) + return _collapse_to_pages((cid for cid, _score in hits), k) + + +class HybridRetriever: + """Reciprocal Rank Fusion of dense + BM25 rankings.""" + name = "hybrid_rrf" + + def __init__(self, dense: DenseRetriever, bm25: BM25Retriever, k_rrf: int = 60, pool: int = 100): + self.dense = dense + self.bm25 = bm25 + self.k_rrf = k_rrf + self.pool = pool + + def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]: + dense_pages = self.dense.retrieve(query, k=self.pool) + bm25_pages = self.bm25.retrieve(query, k=self.pool) + scores: dict[tuple[str, str], float] = {} + for rank, page in enumerate(dense_pages, start=1): + scores[page] = scores.get(page, 0.0) + 1.0 / (self.k_rrf + rank) + for rank, page in enumerate(bm25_pages, start=1): + scores[page] = scores.get(page, 0.0) + 1.0 / (self.k_rrf + rank) + ranked = sorted(scores.items(), key=lambda kv: -kv[1]) + return [page for page, _s in ranked[:k]] + + +def _rerank_pool(rerank_url: str, query: str, ids_and_texts: list[tuple[str, str]], + timeout: float = 30.0) -> list[str] | None: + """POST to /v1/rerank, return ids in reranked order. None on failure.""" + if not ids_and_texts: + return [] + import httpx + try: + with httpx.Client(timeout=timeout) as c: + r = c.post(f"{rerank_url}/v1/rerank", json={ + "query": query, + "documents": [(t or "")[:2000] for _i, t in ids_and_texts], + "top_n": len(ids_and_texts), + }) + r.raise_for_status() + results = r.json().get("results") or [] + return [ids_and_texts[item["index"]][0] for item in results + if isinstance(item.get("index"), int) + and 0 <= item["index"] < len(ids_and_texts)] + except Exception: + return None + + +class RerankedRetriever: + """Pull a candidate pool via a base retriever, then cross-encoder re-rank.""" + + def __init__(self, base: Retriever, collection, rerank_url: str, name_suffix: str = "rerank", + pool: int = 50, timeout: float = 30.0): + self.base = base + self.col = collection + self.url = rerank_url + self.name = f"{base.name}+{name_suffix}" + self.pool = pool + self.timeout = timeout + + def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]: + # Base returns deduplicated page-level tuples; rerank needs CHUNK-level + # texts to be informative. Pull each page's chunk 0 text from Chroma. + pages = self.base.retrieve(query, k=self.pool) + if not pages: + return [] + chunk_ids = [f"{bid}::{pid}::0" for bid, pid in pages] + g = self.col.get(ids=chunk_ids, include=["documents"]) + by_id = dict(zip(g["ids"], g["documents"])) + ids_and_texts = [(cid, by_id.get(cid, "")) for cid in chunk_ids] + order = _rerank_pool(self.url, query, ids_and_texts, timeout=self.timeout) + if order is None: + return pages[:k] + out: list[tuple[str, str]] = [] + seen: set[tuple[str, str]] = set() + for cid in order: + bid, pid, _ = cid.split("::") + key = (bid, pid) + if key in seen: + continue + seen.add(key) + out.append(key) + if len(out) >= k: + break + return out diff --git a/eval/run_eval.py b/eval/run_eval.py index 9ba3aa6..8daa807 100644 --- a/eval/run_eval.py +++ b/eval/run_eval.py @@ -76,15 +76,87 @@ def main() -> int: queries = load_queries(args.queries) print(f"loaded {len(queries)} queries") - # TODO Phase 7: instantiate the retrievers you implemented in - # eval/retrievers.py and run each one against each query. - # Aggregate MRR / Recall@K / nDCG@K per retriever. Emit a - # markdown table to args.output. Commit the file alongside the - # PR that changes retrieval. - raise NotImplementedError( - "Wire up the retrievers in eval/retrievers.py first, then " - "fill in this evaluation loop. See PLAN.md Phase 7." - ) + import os + import chromadb + from chromadb.config import Settings + from rag.embeddings import embedding_function + from rag.bm25 import BM25Index + from eval.retrievers import DenseRetriever, BM25Retriever, HybridRetriever + + product = os.environ.get("PRODUCT_NAME", "hvm") + repo_root = Path(__file__).resolve().parent.parent + client = chromadb.PersistentClient(path=str(repo_root / "chroma"), + settings=Settings(anonymized_telemetry=False)) + col = client.get_collection(f"{product}_docs", embedding_function=embedding_function()) + bm = BM25Index(str(repo_root / "bm25" / f"{product}_docs.db")) + + from eval.retrievers import RerankedRetriever + + dense = DenseRetriever(col) + bm25 = BM25Retriever(bm) + hybrid = HybridRetriever(DenseRetriever(col, pool=100), BM25Retriever(bm, pool=100)) + + retrievers = [dense, bm25, hybrid] + + rerank_url = os.environ.get("RERANK_URL", "").rstrip("/") + if rerank_url: + retrievers += [ + RerankedRetriever(bm25, col, rerank_url, name_suffix="rerank", pool=50), + RerankedRetriever(hybrid, col, rerank_url, name_suffix="rerank", pool=50), + ] + print(f"reranker enabled: {rerank_url}") + + rows: dict[str, dict[str, float]] = {} + per_query: list[dict] = [] + for r in retrievers: + mrr_sum = recall_sum = ndcg_sum = 0.0 + elapsed_sum = 0.0 + for q in queries: + expected = [(e["bundle_id"], e["page_id"]) for e in q["expected"]] + t0 = time.time() + retrieved = r.retrieve(q["query"], k=max(args.k, 10)) + elapsed = time.time() - t0 + mrr = reciprocal_rank(retrieved, expected) + recall = recall_at_k(retrieved, expected, args.k) + ndcg = ndcg_at_k(retrieved, expected, args.k) + mrr_sum += mrr + recall_sum += recall + ndcg_sum += ndcg + elapsed_sum += elapsed + per_query.append({ + "retriever": r.name, "query": q["query"], + "mrr": mrr, "recall@k": recall, "ndcg@k": ndcg, + "top1": list(retrieved[0]) if retrieved else None, + "elapsed_s": round(elapsed, 3), + }) + n = len(queries) + rows[r.name] = { + "MRR": mrr_sum / n, + f"Recall@{args.k}": recall_sum / n, + f"nDCG@{args.k}": ndcg_sum / n, + "avg_latency_s": elapsed_sum / n, + } + print(f" {r.name}: MRR={rows[r.name]['MRR']:.3f} " + f"Recall@{args.k}={rows[r.name][f'Recall@{args.k}']:.3f} " + f"nDCG@{args.k}={rows[r.name][f'nDCG@{args.k}']:.3f} " + f"avg={rows[r.name]['avg_latency_s']*1000:.0f}ms") + + args.output.parent.mkdir(parents=True, exist_ok=True) + md = [f"# Retrieval eval — k={args.k}", "", + f"_{len(queries)} hand-curated queries, generated {time.strftime('%Y-%m-%d %H:%M:%S')}_", "", + "| Retriever | MRR | Recall@{k} | nDCG@{k} | avg latency |".replace("{k}", str(args.k)), + "| --- | ---: | ---: | ---: | ---: |"] + for name, m in rows.items(): + md.append(f"| `{name}` | {m['MRR']:.3f} | {m[f'Recall@{args.k}']:.3f} " + f"| {m[f'nDCG@{args.k}']:.3f} | {m['avg_latency_s']*1000:.0f}ms |") + md += ["", "## Per-query results", "", + "| Retriever | Query | MRR | top-1 |", "| --- | --- | ---: | --- |"] + for r in per_query: + top1 = f"`{r['top1'][0]}/{r['top1'][1][:24]}...`" if r["top1"] else "—" + md.append(f"| `{r['retriever']}` | {r['query'][:60]} | {r['mrr']:.3f} | {top1} |") + args.output.write_text("\n".join(md) + "\n") + print(f"wrote {args.output}") + return 0 if __name__ == "__main__": diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/rerank_server.py b/scripts/rerank_server.py new file mode 100644 index 0000000..f7e08b5 --- /dev/null +++ b/scripts/rerank_server.py @@ -0,0 +1,120 @@ +"""Minimal HTTP reranker — `/v1/rerank` endpoint over a sentence-transformers CrossEncoder. + +Matches the Cohere `/v1/rerank` request/response shape, which is what the +server's `_rerank()` helper expects. This is the dev-friendly fallback; +production replaces this with the llama.cpp + jina-reranker-v2-base GGUF +sidecar (see deploy/docker-compose.yml) without changing the client. + +Request: + POST /v1/rerank + {"model": "...", "query": "...", "documents": ["text", ...], "top_n": 10} + +Response: + {"model": "...", "results": [{"index": 0, "relevance_score": 0.93}, ...]} + +Usage: + python -m scripts.rerank_server # localhost:8001 + RERANK_MODEL=cross-encoder/ms-marco-MiniLM-L-12-v2 \\ + RERANK_PORT=8001 python -m scripts.rerank_server +""" +from __future__ import annotations + +import json +import logging +import os +import sys +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + +MODEL_NAME = os.environ.get("RERANK_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2") +PORT = int(os.environ.get("RERANK_PORT", "8001")) +HOST = os.environ.get("RERANK_HOST", "127.0.0.1") +# Truncate docs to this many chars before scoring. jina-reranker GGUF has a +# 1024-token per-pair cap that 400s the entire batch; ms-marco is more +# forgiving but we still cap to keep latency predictable. +MAX_DOC_CHARS = int(os.environ.get("RERANK_MAX_DOC_CHARS", "2000")) + +_model = None + + +def _get_model(): + global _model + if _model is None: + from sentence_transformers import CrossEncoder + log.info("loading %s", MODEL_NAME) + _model = CrossEncoder(MODEL_NAME) + log.info("loaded") + return _model + + +def _rerank(query: str, documents: list[str], top_n: int | None) -> list[dict]: + model = _get_model() + pairs = [[query, (d or "")[:MAX_DOC_CHARS]] for d in documents] + scores = model.predict(pairs) + ranked = sorted( + ({"index": i, "relevance_score": float(s)} for i, s in enumerate(scores)), + key=lambda r: -r["relevance_score"], + ) + if top_n is not None: + ranked = ranked[:top_n] + return ranked + + +class Handler(BaseHTTPRequestHandler): + def log_message(self, fmt, *args): + log.info("%s - %s", self.address_string(), fmt % args) + + def _send_json(self, status: int, payload: dict) -> None: + body = json.dumps(payload).encode() + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self): # noqa: N802 + if self.path in ("/", "/health"): + self._send_json(200, {"status": "ok", "model": MODEL_NAME}) + return + self._send_json(404, {"error": "not found"}) + + def do_POST(self): # noqa: N802 + if self.path not in ("/v1/rerank", "/rerank"): + self._send_json(404, {"error": "not found"}) + return + length = int(self.headers.get("Content-Length", "0")) + try: + req = json.loads(self.rfile.read(length).decode()) + except Exception as e: + self._send_json(400, {"error": f"bad json: {e}"}) + return + query = req.get("query") + documents = req.get("documents") + if not isinstance(query, str) or not isinstance(documents, list): + self._send_json(400, {"error": "expected {query: str, documents: list[str]}"}) + return + top_n = req.get("top_n") + try: + results = _rerank(query, documents, top_n if isinstance(top_n, int) else None) + except Exception as e: + log.exception("rerank failed") + self._send_json(500, {"error": str(e)}) + return + self._send_json(200, {"model": MODEL_NAME, "results": results}) + + +def main() -> int: + _get_model() # warm-load before accepting traffic + server = ThreadingHTTPServer((HOST, PORT), Handler) + log.info("listening on http://%s:%d", HOST, PORT) + try: + server.serve_forever() + except KeyboardInterrupt: + log.info("shutting down") + return 0 + + +if __name__ == "__main__": + sys.exit(main())