search: BM25-default + cross-encoder rerank, hybrid behind env gate

Phase 3/6/7/8 in one pass since they depend on each other.

* docs_mcp/server.py
  - Wire search_docs / get_page / list_versions tool bodies.
  - search_docs flow: BM25 first (rag.bm25 FTS5) → over-fetch RERANK_POOL
    chunks → POST to RERANK_URL/v1/rerank → return top-k. Dense is the
    fallback when BM25 finds nothing. HYBRID_SEARCH=true switches to
    dense+BM25+RRF (fused via the new _rrf_fuse helper).
  - All retrieval failures are caught and fall back to the next layer,
    so a dead reranker or missing BM25 db never blocks a search.
  - Source URLs built from the bundle's docId so results link straight
    into support.hpe.com.

* eval/
  - 22 hand-curated golden queries grounded in real corpus page titles.
  - DenseRetriever / BM25Retriever / HybridRetriever / RerankedRetriever
    + MRR/Recall@K/nDCG@K harness. RERANK_URL env activates the
    reranked variants.
  - Committed eval/results/baseline.md. On this corpus:
        dense:                MRR 0.539
        bm25:                 MRR 0.880
        hybrid_rrf:           MRR 0.692
        bm25+rerank:          MRR 0.920  (winner)
        hybrid_rrf+rerank:    MRR 0.875
    HPE structured docs use controlled vocabulary, so lexical match
    dominates. Hybrid loses because dense pollutes the fused pool.

* scripts/rerank_server.py
  - Minimal HTTP /v1/rerank over sentence-transformers
    cross-encoder/ms-marco-MiniLM-L-6-v2. Cohere-style request/response.
  - This is the dev/CPU fallback; production replaces it with the
    llama.cpp + jina-reranker-v2-base GGUF sidecar (same wire protocol).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-22 13:06:51 -04:00
parent dd691b0111
commit dda044eb95
8 changed files with 864 additions and 57 deletions
+271 -20
View File
@@ -35,8 +35,11 @@ log = logging.getLogger(__name__)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Product-specific configuration. Set these for each new build. # Product-specific configuration. Set these for each new build.
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
PRODUCT_NAME = os.environ.get("PRODUCT_NAME", "myproduct") PRODUCT_NAME = os.environ.get("PRODUCT_NAME", "hvm")
PRODUCT_DOCS_URL = os.environ.get("PRODUCT_DOCS_URL", "https://docs.example.com") PRODUCT_DOCS_URL = os.environ.get(
"PRODUCT_DOCS_URL",
"https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007735en_us",
)
COLLECTION = f"{PRODUCT_NAME}_docs" COLLECTION = f"{PRODUCT_NAME}_docs"
# Paths inside the deployed container (and matching layout locally for dev). # Paths inside the deployed container (and matching layout locally for dev).
@@ -104,6 +107,15 @@ def _build_where(version: str | None, platform: str | None, bundle_id: str | Non
return {"$and": conds} return {"$and": conds}
def _where_for_bm25(version: str | None, platform: str | None, bundle_id: str | None) -> dict | None:
"""BM25Index.query takes a flat dict of equality filters."""
w: dict[str, str] = {}
if version: w["version"] = version
if platform: w["platform"] = platform
if bundle_id: w["bundle_id"] = bundle_id
return w or None
def _read_page(bundle_id: str, page_id: str) -> tuple[str, dict] | None: def _read_page(bundle_id: str, page_id: str) -> tuple[str, dict] | None:
"""Read a corpus page off disk. Returns (markdown_body, metadata_dict).""" """Read a corpus page off disk. Returns (markdown_body, metadata_dict)."""
md_path = CORPUS / bundle_id / (page_id + ".md") md_path = CORPUS / bundle_id / (page_id + ".md")
@@ -113,6 +125,115 @@ def _read_page(bundle_id: str, page_id: str) -> tuple[str, dict] | None:
return md_path.read_text(), json.loads(json_path.read_text()) return md_path.read_text(), json.loads(json_path.read_text())
_CHROMA = None
_BM25 = None
def _collection():
"""Lazy Chroma collection handle. Cached after first call."""
global _CHROMA
if _CHROMA is None:
import chromadb
from chromadb.config import Settings
from rag.embeddings import embedding_function
client = chromadb.PersistentClient(
path=str(CHROMA_DIR),
settings=Settings(anonymized_telemetry=False),
)
_CHROMA = client.get_collection(COLLECTION, embedding_function=embedding_function())
return _CHROMA
def _bm25():
"""Lazy BM25Index handle. None if the FTS5 db isn't built."""
global _BM25
if _BM25 is None:
if not BM25_DB.exists():
return None
try:
from rag.bm25 import BM25Index
_BM25 = BM25Index(str(BM25_DB))
except Exception as e: # defensive: hybrid must never block dense
log.warning("BM25 unavailable, falling back to dense-only: %s", e)
return None
return _BM25
def _enrich_from_chroma(col, chunk_ids: list[str], fused: list | None) -> tuple[list[str], list[dict], list[float]]:
"""Fetch document text + metadata for a list of chunk ids from Chroma, in order."""
if not chunk_ids:
return [], [], []
g = col.get(ids=chunk_ids, include=["documents", "metadatas"])
by_id = {i: (d, m) for i, d, m in zip(g["ids"], g["documents"], g["metadatas"])}
docs = [by_id[i][0] for i in chunk_ids if i in by_id]
metas = [by_id[i][1] for i in chunk_ids if i in by_id]
if fused is not None:
dists = [1.0 - score for _id, score, _src in fused[:len(docs)]]
else:
dists = [0.0] * len(docs)
return docs, metas, dists
def _rerank(query: str, candidates: list[tuple[str, str]]) -> list[tuple[str, str]] | None:
"""POST to RERANK_URL /v1/rerank, return candidates re-ordered by relevance.
`candidates` is `[(chunk_id, text), ...]`. Texts are truncated to ~2000 chars
before sending so we never blow past jina-reranker's 1024-token per-pair
cap (which 400s the entire batch). The full untruncated text still goes
back to the user from Chroma; truncation is reranking-only.
Returns None on any failure — caller treats that as "skip reranking,
keep retrieval-order candidates."
"""
if not RERANK_URL or not candidates:
return None
try:
import httpx
payload = {
"query": query,
"documents": [(text or "")[:2000] for _cid, text in candidates],
"top_n": len(candidates),
}
with httpx.Client(timeout=RERANK_TIMEOUT) as c:
r = c.post(f"{RERANK_URL}/v1/rerank", json=payload)
r.raise_for_status()
results = r.json().get("results") or []
order = [candidates[item["index"]] for item in results
if isinstance(item.get("index"), int) and 0 <= item["index"] < len(candidates)]
return order or None
except Exception as e:
log.warning("rerank failed, keeping retrieval order: %s", e)
return None
def _rrf_fuse(*ranked_lists: list[str], k: int = RRF_K) -> list[tuple[str, float, dict]]:
"""Reciprocal Rank Fusion. Each ranked list is a sequence of ids in
descending relevance. Returns [(id, fused_score, per_retriever_contrib), ...]
sorted by score desc."""
scores: dict[str, float] = {}
sources: dict[str, dict] = {}
names = ("dense", "bm25", "extra")
for idx, lst in enumerate(ranked_lists):
src = names[idx] if idx < len(names) else f"r{idx}"
for rank, ident in enumerate(lst, start=1):
scores[ident] = scores.get(ident, 0.0) + 1.0 / (k + rank)
sources.setdefault(ident, {})[src] = rank
ranked = sorted(scores.items(), key=lambda kv: -kv[1])
return [(ident, score, sources[ident]) for ident, score in ranked]
def _source_url(bundle_id: str, page_id: str) -> str:
"""Build the canonical docs portal URL for a (bundle, page) pair."""
b = _bundles().get(bundle_id)
if not b:
return ""
doc_id = b.get("doc_id", "")
if page_id.startswith("GUID-"):
return f"https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}&page={page_id}.html"
return f"https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}"
# =========================================================================== # ===========================================================================
# Tools # Tools
# =========================================================================== # ===========================================================================
@@ -134,7 +255,7 @@ def search_docs(
] = None, ] = None,
k: Annotated[int, Field(description="Number of results to return.", ge=1, le=50)] = 10, k: Annotated[int, Field(description="Number of results to return.", ge=1, le=50)] = 10,
) -> str: ) -> str:
"""Search the {product} docs corpus. """Search the HPE Morpheus VM Essentials (HVM) docs corpus.
Returns the top-k most relevant chunks (with full source page URLs) Returns the top-k most relevant chunks (with full source page URLs)
given a natural-language query. Optional filters narrow the search given a natural-language query. Optional filters narrow the search
@@ -142,20 +263,130 @@ def search_docs(
first if you need to discover the available facet values. first if you need to discover the available facet values.
Call this tool whenever the user asks anything that should be Call this tool whenever the user asks anything that should be
answerable from the official product documentation. answerable from the official product documentation — install,
upgrade, configuration, backups, networking, HVM clusters, the
Morpheus UI, or any 8.1.x release-notes question.
""" """
with TimedCall("search_docs", { with TimedCall("search_docs", {
"query": query, "version": version, "platform": platform, "query": query, "version": version, "platform": platform,
"bundle_id": bundle_id, "k": k, "bundle_id": bundle_id, "k": k,
}) as _call: }) as _call:
# TODO Phase 2-3: query Chroma collection (see rag/index.py for try:
# how it was built). Render the top-k chunks as markdown with col = _collection()
# source URLs. except Exception as e:
# TODO Phase 6: optional reranker via _rerank() if RERANK_URL set. log.exception("chroma collection unavailable")
# TODO Phase 8: hybrid retrieval if HYBRID_SEARCH=true — run _call.set(hits_returned=0, error=str(e))
# dense + BM25 in parallel, RRF-fuse, hand merged pool to rerank. return f"_(search backend unavailable: {e})_"
_call.set(hits_returned=0)
raise NotImplementedError("Phase 2/3: implement Chroma query + rendering") where = _build_where(version, platform, bundle_id)
bm25_where = _where_for_bm25(version, platform, bundle_id)
pool = max(k * 5, 50)
# Retrieval mode selection. Eval on this corpus (2026-05-22, 22 golden
# queries) showed BM25 MRR=0.88 vs dense MRR=0.54 vs hybrid MRR=0.69 —
# HPE structured docs use controlled vocabulary, so lexical match wins.
# Dense is kept as fallback when BM25 has no tokens to chew on (e.g.
# purely stopword queries). HYBRID_SEARCH=true forces RRF fusion.
bm = _bm25()
docs: list[str] = []
metas: list[dict] = []
dists: list[float] = []
retrieval_mode = "dense"
top1_source = "dense_only"
if HYBRID_SEARCH and bm is not None:
try:
dense_res = col.query(query_texts=[query], n_results=pool, where=where)
dense_ids = (dense_res.get("ids") or [[]])[0]
bm_hits = bm.query(query, n=pool, where=bm25_where)
bm_ids = [cid for cid, _s in bm_hits]
fused = _rrf_fuse(dense_ids, bm_ids)
docs, metas, dists = _enrich_from_chroma(col, [c for c, _, _ in fused[:k]], fused)
if fused:
src0 = fused[0][2]
top1_source = ("both" if {"dense", "bm25"} <= set(src0)
else "bm25_only" if "bm25" in src0
else "dense_only")
retrieval_mode = "hybrid"
except Exception as e:
log.warning("hybrid failed, falling back to BM25→dense: %s", e)
if not docs and bm is not None:
try:
bm_hits = bm.query(query, n=k, where=bm25_where)
if bm_hits:
ids = [cid for cid, _s in bm_hits[:k]]
docs, metas, _ = _enrich_from_chroma(col, ids, None)
# FTS5 returns negative scores (lower=better). Map onto a
# similarity-ish [0..1] just for display.
dists = [max(0.0, min(1.0, 1.0 - abs(s) / 20.0)) for _id, s in bm_hits[:k]]
retrieval_mode = "bm25"
top1_source = "bm25_only"
except Exception as e:
log.warning("BM25 retrieval failed, falling back to dense: %s", e)
if not docs:
res = col.query(query_texts=[query], n_results=k, where=where)
docs = (res.get("documents") or [[]])[0]
metas = (res.get("metadatas") or [[]])[0]
dists = (res.get("distances") or [[]])[0]
reranker_fired = False
if RERANK_URL and docs:
# Pull a deeper pool to give the reranker something to chew on.
# We over-fetch up to RERANK_POOL chunks from whichever retriever
# already won, then ask the reranker to pick the final top-k.
pool_size = max(k, RERANK_POOL)
if len(docs) < pool_size:
if retrieval_mode == "bm25":
extra = bm.query(query, n=pool_size, where=bm25_where) if bm else []
extra_ids = [cid for cid, _s in extra]
else:
extra_res = col.query(query_texts=[query], n_results=pool_size, where=where)
extra_ids = (extra_res.get("ids") or [[]])[0]
if extra_ids:
d2, m2, _ = _enrich_from_chroma(col, extra_ids, None)
docs, metas = d2, m2
dists = [0.0] * len(docs)
# Reranker scores chunk_ids — collapse to (id, text) tuples
pairs = list(zip(
[f"{m.get('bundle_id','')}::{m.get('page_id','')}::{m.get('ordinal',0)}" for m in metas],
docs,
))
reranked = _rerank(query, pairs)
if reranked is not None:
# Re-sort docs/metas to match. Recompute distances as descending
# ordinal ranks so display still shows a useful score.
by_cid = {p[0]: i for i, p in enumerate(pairs)}
order = [by_cid[cid] for cid, _t in reranked if cid in by_cid]
docs = [docs[i] for i in order][:k]
metas = [metas[i] for i in order][:k]
dists = [1.0 - (rank / len(reranked)) for rank, _ in enumerate(reranked)][:len(docs)]
reranker_fired = True
else:
docs, metas, dists = docs[:k], metas[:k], dists[:k]
_call.set(hits_returned=len(docs), retrieval_mode=retrieval_mode,
top1_source=top1_source, reranker_fired=reranker_fired)
if not docs:
return f"_No matches for `{query}`._"
out = [f"# {len(docs)} result(s) for `{query}`", ""]
for doc, meta, dist in zip(docs, metas, dists):
bid = meta.get("bundle_id", "")
pid = meta.get("page_id", "")
title = meta.get("title") or pid
ver = meta.get("version") or ""
url = _source_url(bid, pid)
header = f"## {title}"
if ver:
header += f" _(v{ver})_"
out.append(header)
out.append(f"[{bid}/{pid}]({url}) · score={1 - dist:.3f}")
out.append("")
out.append(doc.strip())
out.append("")
return "\n".join(out)
@mcp.tool() @mcp.tool()
@@ -175,9 +406,21 @@ def get_page(
return f"Page not found: {bundle_id}/{page_id}" return f"Page not found: {bundle_id}/{page_id}"
md, meta = data md, meta = data
_call.set(found=True, page_chars=len(md)) _call.set(found=True, page_chars=len(md))
# TODO: add a metadata header (title, version, source URL) above title = meta.get("title") or page_id
# the body. Product-specific shape. ver = meta.get("version")
return md parent = meta.get("parent_title")
url = _source_url(bundle_id, page_id)
header = [f"# {title}"]
ctx = []
if ver:
ctx.append(f"version **{ver}**")
if parent:
ctx.append(f"in **{parent}**")
if ctx:
header.append("_" + " · ".join(ctx) + "_")
header.append(f"[source]({url})")
header.append("")
return "\n".join(header) + "\n" + md
@mcp.tool() @mcp.tool()
@@ -193,14 +436,22 @@ def list_versions() -> str:
versions = sorted({b.get("version") for b in cat.values() if b.get("version")}) versions = sorted({b.get("version") for b in cat.values() if b.get("version")})
platforms = sorted({b.get("platform") for b in cat.values() if b.get("platform")}) platforms = sorted({b.get("platform") for b in cat.values() if b.get("platform")})
_call.set(versions=len(versions), platforms=len(platforms)) _call.set(versions=len(versions), platforms=len(platforms))
products = sorted({b.get("product") for b in cat.values() if b.get("product")})
lines = [f"# Facets across {len(cat)} bundle(s)", ""] lines = [f"# Facets across {len(cat)} bundle(s)", ""]
if versions: if versions:
lines.append("## Versions"); lines.append("") lines += ["## Versions", ""] + [f"- `{v}`" for v in versions] + [""]
for v in versions: lines.append(f"- `{v}`")
lines.append("")
if platforms: if platforms:
lines.append("## Platforms"); lines.append("") lines += ["## Platforms", ""] + [f"- `{p}`" for p in platforms] + [""]
for p in platforms: lines.append(f"- `{p}`") if products:
lines += ["## Product / doc types", ""] + [f"- {p}" for p in products] + [""]
lines += ["## Bundles", ""]
for slug in sorted(cat):
b = cat[slug]
kind = b.get("product") or ""
ver = b.get("version")
pages = b.get("page_count", "?")
label = f"{kind} {ver}".strip() if ver else kind
lines.append(f"- `{slug}` — {label} ({pages} pages)")
return "\n".join(lines) return "\n".join(lines)
+22
View File
@@ -0,0 +1,22 @@
{"query": "VME Manager sizing recommendations small medium large", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-0F55384D-5632-4CDC-AA39-A21C1C089AFA"}], "tags": ["deployment", "sizing", "keyword-heavy"]}
{"query": "create an instance backup", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-9DA38943-BF95-446D-AB09-32323160D51B"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-9DA38943-BF95-446D-AB09-32323160D51B"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-9DA38943-BF95-446D-AB09-32323160D51B"}], "tags": ["backups", "how-to"]}
{"query": "what are the host hardware requirements", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-BE7493B3-B866-4269-9C13-ABFCF84658F2"}], "tags": ["deployment", "prereqs"]}
{"query": "Japanese keyboard layout in console sessions", "expected": [{"bundle_id": "hvm_release_notes_8_1_2", "page_id": "sd00007734en_us"}], "tags": ["release-notes", "8.1.2", "localization"]}
{"query": "elevate to Morpheus Enterprise", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-ECCA4FDD-37C8-45CE-A71F-C6E73B3BA713"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-ECCA4FDD-37C8-45CE-A71F-C6E73B3BA713"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-ECCA4FDD-37C8-45CE-A71F-C6E73B3BA713"}], "tags": ["upgrade"]}
{"query": "create an HVM cluster", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-99397996-8315-49C1-9E2F-2EED51CE03F3"}], "tags": ["deployment", "cluster"]}
{"query": "back up and restore the VM Essentials manager", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-2686BF33-E793-4BF9-98BE-82F800ED03EB"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-2686BF33-E793-4BF9-98BE-82F800ED03EB"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-2686BF33-E793-4BF9-98BE-82F800ED03EB"}], "tags": ["backups", "disaster-recovery"]}
{"query": "configure storage buckets", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-30F59A1F-0573-4D88-A679-B5C5168BCE6D"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-30F59A1F-0573-4D88-A679-B5C5168BCE6D"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-30F59A1F-0573-4D88-A679-B5C5168BCE6D"}], "tags": ["storage"]}
{"query": "disable two-factor authentication", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-0CBD386D-DED9-474D-A9F3-587F58B2D22D"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-0CBD386D-DED9-474D-A9F3-587F58B2D22D"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-0CBD386D-DED9-474D-A9F3-587F58B2D22D"}], "tags": ["security", "auth"]}
{"query": "upgrading the manager", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-4EDB4324-2C3B-435F-80FF-F430D02A2FDA"}], "tags": ["upgrade"]}
{"query": "supported storage protocols", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-6EBCB223-4C48-456F-950C-C8ED5610A0F8"}], "tags": ["deployment", "storage"]}
{"query": "network bonding configuration", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-6F1B4C62-CCE2-4AE5-9CE7-83C407BFE290"}], "tags": ["networking"]}
{"query": "what TCP ports does HVM need open", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-97DDED8D-EE6B-4819-8080-E163FD533CAB"}], "tags": ["networking", "firewall"]}
{"query": "install HVM OS on a host server", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-28F18596-4902-4CD1-83F3-1411430C5534"}], "tags": ["deployment", "install"]}
{"query": "configure Linux images for HVM clusters", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-8D494112-C361-4300-B7BF-B4DFE06E871C"}], "tags": ["deployment", "images"]}
{"query": "create a user account", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-21972435-BFD0-481F-A3E6-A52B116989F3"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-21972435-BFD0-481F-A3E6-A52B116989F3"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-21972435-BFD0-481F-A3E6-A52B116989F3"}], "tags": ["admin", "users"]}
{"query": "Openstack Swift bucket", "expected": [{"bundle_id": "hvm_user_manual_8_1_2", "page_id": "GUID-0D60567D-2DD3-4D8B-92D6-6849C7D773EA"}, {"bundle_id": "hvm_user_manual_8_1_1", "page_id": "GUID-0D60567D-2DD3-4D8B-92D6-6849C7D773EA"}, {"bundle_id": "hvm_user_manual_8_1_0", "page_id": "GUID-0D60567D-2DD3-4D8B-92D6-6849C7D773EA"}], "tags": ["storage", "rare-token"]}
{"query": "API reference for VM Essentials", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-88494051-74D1-4BD9-BAFF-134A573FF77B"}], "tags": ["api"]}
{"query": "Worker version compatibility 8.1.2", "expected": [{"bundle_id": "hvm_release_notes_8_1_2", "page_id": "sd00007734en_us"}], "tags": ["release-notes", "8.1.2"]}
{"query": "recommended converged networking setup scenario", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-2DD9D39D-9031-4BB5-A4ED-A0179BEF5259"}], "tags": ["networking", "deployment"]}
{"query": "qualification matrix supported hardware", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-E3635F0A-11DA-4078-8C3A-8D4B75724849"}], "tags": ["deployment", "compatibility"]}
{"query": "configure the VM Essentials manager initial setup", "expected": [{"bundle_id": "hvm_deployment_guide", "page_id": "GUID-456E190C-E912-4079-A691-8D2368D63748"}], "tags": ["deployment", "configuration"]}
+126
View File
@@ -0,0 +1,126 @@
# Retrieval eval — k=5
_22 hand-curated queries, generated 2026-05-22 12:35:57_
| Retriever | MRR | Recall@5 | nDCG@5 | avg latency |
| --- | ---: | ---: | ---: | ---: |
| `dense` | 0.539 | 0.621 | 0.558 | 88ms |
| `bm25` | 0.880 | 0.909 | 0.883 | 3ms |
| `hybrid_rrf` | 0.692 | 0.818 | 0.713 | 69ms |
| `bm25+rerank` | 0.920 | 0.939 | 0.927 | 490ms |
| `hybrid_rrf+rerank` | 0.875 | 0.894 | 0.881 | 549ms |
## Per-query results
| Retriever | Query | MRR | top-1 |
| --- | --- | ---: | --- |
| `dense` | VME Manager sizing recommendations small medium large | 0.000 | `hvm_user_manual_8_1_0/GUID-5D0F2D33-623B-4AA3-...` |
| `dense` | create an instance backup | 0.250 | `hvm_user_manual_8_1_0/GUID-1C1ADB18-710D-40A4-...` |
| `dense` | what are the host hardware requirements | 0.500 | `hvm_deployment_guide/GUID-3DA92E9D-0635-427A-...` |
| `dense` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` |
| `dense` | elevate to Morpheus Enterprise | 0.000 | `hvm_user_manual_8_1_0/GUID-37CA1665-B331-452A-...` |
| `dense` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `dense` | back up and restore the VM Essentials manager | 0.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `dense` | configure storage buckets | 0.100 | `hvm_user_manual_8_1_0/GUID-3AC9CEB6-F60C-46FA-...` |
| `dense` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `dense` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `dense` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `dense` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `dense` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `dense` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `dense` | configure Linux images for HVM clusters | 0.000 | `hvm_deployment_guide/GUID-EC50D35A-3B63-44DD-...` |
| `dense` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` |
| `dense` | Openstack Swift bucket | 0.000 | `hvm_user_manual_8_1_0/GUID-B9045AFD-6F61-42D7-...` |
| `dense` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `dense` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `dense` | recommended converged networking setup scenario | 0.500 | `hvm_deployment_guide/GUID-BDF3EFBF-EA90-4E6F-...` |
| `dense` | qualification matrix supported hardware | 0.500 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `dense` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
| `bm25` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` |
| `bm25` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` |
| `bm25` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `bm25` | Japanese keyboard layout in console sessions | 0.250 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` |
| `bm25` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` |
| `bm25` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `bm25` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` |
| `bm25` | configure storage buckets | 0.100 | `hvm_user_manual_8_1_0/GUID-6B35AE72-3D19-447A-...` |
| `bm25` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `bm25` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `bm25` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `bm25` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `bm25` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `bm25` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `bm25` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` |
| `bm25` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` |
| `bm25` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` |
| `bm25` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `bm25` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `bm25` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` |
| `bm25` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` |
| `bm25` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
| `hybrid_rrf` | VME Manager sizing recommendations small medium large | 0.200 | `hvm_user_manual_8_1_0/GUID-BB3046E2-F2D4-4B45-...` |
| `hybrid_rrf` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` |
| `hybrid_rrf` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `hybrid_rrf` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` |
| `hybrid_rrf` | elevate to Morpheus Enterprise | 0.500 | `hvm_user_manual_8_1_0/GUID-37CA1665-B331-452A-...` |
| `hybrid_rrf` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `hybrid_rrf` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` |
| `hybrid_rrf` | configure storage buckets | 0.200 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` |
| `hybrid_rrf` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `hybrid_rrf` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `hybrid_rrf` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `hybrid_rrf` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `hybrid_rrf` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `hybrid_rrf` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `hybrid_rrf` | configure Linux images for HVM clusters | 0.200 | `hvm_deployment_guide/GUID-EC50D35A-3B63-44DD-...` |
| `hybrid_rrf` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` |
| `hybrid_rrf` | Openstack Swift bucket | 0.125 | `hvm_user_manual_8_1_0/GUID-6B35AE72-3D19-447A-...` |
| `hybrid_rrf` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `hybrid_rrf` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `hybrid_rrf` | recommended converged networking setup scenario | 0.500 | `hvm_deployment_guide/GUID-BDF3EFBF-EA90-4E6F-...` |
| `hybrid_rrf` | qualification matrix supported hardware | 0.500 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `hybrid_rrf` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
| `bm25+rerank` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` |
| `bm25+rerank` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` |
| `bm25+rerank` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `bm25+rerank` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_2/GUID-BCD3CA0B-5C7E-46CB-...` |
| `bm25+rerank` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` |
| `bm25+rerank` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `bm25+rerank` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` |
| `bm25+rerank` | configure storage buckets | 0.250 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` |
| `bm25+rerank` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `bm25+rerank` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `bm25+rerank` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `bm25+rerank` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `bm25+rerank` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `bm25+rerank` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `bm25+rerank` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` |
| `bm25+rerank` | create a user account | 1.000 | `hvm_user_manual_8_1_0/GUID-21972435-BFD0-481F-...` |
| `bm25+rerank` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` |
| `bm25+rerank` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `bm25+rerank` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `bm25+rerank` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` |
| `bm25+rerank` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` |
| `bm25+rerank` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
| `hybrid_rrf+rerank` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` |
| `hybrid_rrf+rerank` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` |
| `hybrid_rrf+rerank` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `hybrid_rrf+rerank` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` |
| `hybrid_rrf+rerank` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` |
| `hybrid_rrf+rerank` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `hybrid_rrf+rerank` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` |
| `hybrid_rrf+rerank` | configure storage buckets | 0.250 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` |
| `hybrid_rrf+rerank` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `hybrid_rrf+rerank` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `hybrid_rrf+rerank` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `hybrid_rrf+rerank` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `hybrid_rrf+rerank` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `hybrid_rrf+rerank` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `hybrid_rrf+rerank` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` |
| `hybrid_rrf+rerank` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-443F9C0E-FFD9-48B9-...` |
| `hybrid_rrf+rerank` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` |
| `hybrid_rrf+rerank` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `hybrid_rrf+rerank` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `hybrid_rrf+rerank` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` |
| `hybrid_rrf+rerank` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` |
| `hybrid_rrf+rerank` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
+126
View File
@@ -0,0 +1,126 @@
# Retrieval eval — k=5
_22 hand-curated queries, generated 2026-05-22 12:35:57_
| Retriever | MRR | Recall@5 | nDCG@5 | avg latency |
| --- | ---: | ---: | ---: | ---: |
| `dense` | 0.539 | 0.621 | 0.558 | 88ms |
| `bm25` | 0.880 | 0.909 | 0.883 | 3ms |
| `hybrid_rrf` | 0.692 | 0.818 | 0.713 | 69ms |
| `bm25+rerank` | 0.920 | 0.939 | 0.927 | 490ms |
| `hybrid_rrf+rerank` | 0.875 | 0.894 | 0.881 | 549ms |
## Per-query results
| Retriever | Query | MRR | top-1 |
| --- | --- | ---: | --- |
| `dense` | VME Manager sizing recommendations small medium large | 0.000 | `hvm_user_manual_8_1_0/GUID-5D0F2D33-623B-4AA3-...` |
| `dense` | create an instance backup | 0.250 | `hvm_user_manual_8_1_0/GUID-1C1ADB18-710D-40A4-...` |
| `dense` | what are the host hardware requirements | 0.500 | `hvm_deployment_guide/GUID-3DA92E9D-0635-427A-...` |
| `dense` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` |
| `dense` | elevate to Morpheus Enterprise | 0.000 | `hvm_user_manual_8_1_0/GUID-37CA1665-B331-452A-...` |
| `dense` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `dense` | back up and restore the VM Essentials manager | 0.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `dense` | configure storage buckets | 0.100 | `hvm_user_manual_8_1_0/GUID-3AC9CEB6-F60C-46FA-...` |
| `dense` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `dense` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `dense` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `dense` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `dense` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `dense` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `dense` | configure Linux images for HVM clusters | 0.000 | `hvm_deployment_guide/GUID-EC50D35A-3B63-44DD-...` |
| `dense` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` |
| `dense` | Openstack Swift bucket | 0.000 | `hvm_user_manual_8_1_0/GUID-B9045AFD-6F61-42D7-...` |
| `dense` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `dense` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `dense` | recommended converged networking setup scenario | 0.500 | `hvm_deployment_guide/GUID-BDF3EFBF-EA90-4E6F-...` |
| `dense` | qualification matrix supported hardware | 0.500 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `dense` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
| `bm25` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` |
| `bm25` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` |
| `bm25` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `bm25` | Japanese keyboard layout in console sessions | 0.250 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` |
| `bm25` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` |
| `bm25` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `bm25` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` |
| `bm25` | configure storage buckets | 0.100 | `hvm_user_manual_8_1_0/GUID-6B35AE72-3D19-447A-...` |
| `bm25` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `bm25` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `bm25` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `bm25` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `bm25` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `bm25` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `bm25` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` |
| `bm25` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` |
| `bm25` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` |
| `bm25` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `bm25` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `bm25` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` |
| `bm25` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` |
| `bm25` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
| `hybrid_rrf` | VME Manager sizing recommendations small medium large | 0.200 | `hvm_user_manual_8_1_0/GUID-BB3046E2-F2D4-4B45-...` |
| `hybrid_rrf` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` |
| `hybrid_rrf` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `hybrid_rrf` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` |
| `hybrid_rrf` | elevate to Morpheus Enterprise | 0.500 | `hvm_user_manual_8_1_0/GUID-37CA1665-B331-452A-...` |
| `hybrid_rrf` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `hybrid_rrf` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` |
| `hybrid_rrf` | configure storage buckets | 0.200 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` |
| `hybrid_rrf` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `hybrid_rrf` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `hybrid_rrf` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `hybrid_rrf` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `hybrid_rrf` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `hybrid_rrf` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `hybrid_rrf` | configure Linux images for HVM clusters | 0.200 | `hvm_deployment_guide/GUID-EC50D35A-3B63-44DD-...` |
| `hybrid_rrf` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-83AC77E8-48AE-420C-...` |
| `hybrid_rrf` | Openstack Swift bucket | 0.125 | `hvm_user_manual_8_1_0/GUID-6B35AE72-3D19-447A-...` |
| `hybrid_rrf` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `hybrid_rrf` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `hybrid_rrf` | recommended converged networking setup scenario | 0.500 | `hvm_deployment_guide/GUID-BDF3EFBF-EA90-4E6F-...` |
| `hybrid_rrf` | qualification matrix supported hardware | 0.500 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `hybrid_rrf` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
| `bm25+rerank` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` |
| `bm25+rerank` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` |
| `bm25+rerank` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `bm25+rerank` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_2/GUID-BCD3CA0B-5C7E-46CB-...` |
| `bm25+rerank` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` |
| `bm25+rerank` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `bm25+rerank` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` |
| `bm25+rerank` | configure storage buckets | 0.250 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` |
| `bm25+rerank` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `bm25+rerank` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `bm25+rerank` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `bm25+rerank` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `bm25+rerank` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `bm25+rerank` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `bm25+rerank` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` |
| `bm25+rerank` | create a user account | 1.000 | `hvm_user_manual_8_1_0/GUID-21972435-BFD0-481F-...` |
| `bm25+rerank` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` |
| `bm25+rerank` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `bm25+rerank` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `bm25+rerank` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` |
| `bm25+rerank` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` |
| `bm25+rerank` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
| `hybrid_rrf+rerank` | VME Manager sizing recommendations small medium large | 1.000 | `hvm_deployment_guide/GUID-0F55384D-5632-4CDC-...` |
| `hybrid_rrf+rerank` | create an instance backup | 1.000 | `hvm_user_manual_8_1_0/GUID-9DA38943-BF95-446D-...` |
| `hybrid_rrf+rerank` | what are the host hardware requirements | 1.000 | `hvm_deployment_guide/GUID-BE7493B3-B866-4269-...` |
| `hybrid_rrf+rerank` | Japanese keyboard layout in console sessions | 0.000 | `hvm_user_manual_8_1_0/GUID-BCD3CA0B-5C7E-46CB-...` |
| `hybrid_rrf+rerank` | elevate to Morpheus Enterprise | 1.000 | `hvm_user_manual_8_1_0/GUID-ECCA4FDD-37C8-45CE-...` |
| `hybrid_rrf+rerank` | create an HVM cluster | 1.000 | `hvm_deployment_guide/GUID-99397996-8315-49C1-...` |
| `hybrid_rrf+rerank` | back up and restore the VM Essentials manager | 1.000 | `hvm_user_manual_8_1_0/GUID-2686BF33-E793-4BF9-...` |
| `hybrid_rrf+rerank` | configure storage buckets | 0.250 | `hvm_user_manual_8_1_0/GUID-E32D92B2-8DB3-42C3-...` |
| `hybrid_rrf+rerank` | disable two-factor authentication | 1.000 | `hvm_user_manual_8_1_0/GUID-0CBD386D-DED9-474D-...` |
| `hybrid_rrf+rerank` | upgrading the manager | 1.000 | `hvm_deployment_guide/GUID-4EDB4324-2C3B-435F-...` |
| `hybrid_rrf+rerank` | supported storage protocols | 1.000 | `hvm_deployment_guide/GUID-6EBCB223-4C48-456F-...` |
| `hybrid_rrf+rerank` | network bonding configuration | 1.000 | `hvm_deployment_guide/GUID-6F1B4C62-CCE2-4AE5-...` |
| `hybrid_rrf+rerank` | what TCP ports does HVM need open | 1.000 | `hvm_deployment_guide/GUID-97DDED8D-EE6B-4819-...` |
| `hybrid_rrf+rerank` | install HVM OS on a host server | 1.000 | `hvm_deployment_guide/GUID-28F18596-4902-4CD1-...` |
| `hybrid_rrf+rerank` | configure Linux images for HVM clusters | 1.000 | `hvm_deployment_guide/GUID-8D494112-C361-4300-...` |
| `hybrid_rrf+rerank` | create a user account | 0.000 | `hvm_user_manual_8_1_0/GUID-443F9C0E-FFD9-48B9-...` |
| `hybrid_rrf+rerank` | Openstack Swift bucket | 1.000 | `hvm_user_manual_8_1_0/GUID-0D60567D-2DD3-4D8B-...` |
| `hybrid_rrf+rerank` | API reference for VM Essentials | 1.000 | `hvm_deployment_guide/GUID-88494051-74D1-4BD9-...` |
| `hybrid_rrf+rerank` | Worker version compatibility 8.1.2 | 1.000 | `hvm_release_notes_8_1_2/sd00007734en_us...` |
| `hybrid_rrf+rerank` | recommended converged networking setup scenario | 1.000 | `hvm_deployment_guide/GUID-2DD9D39D-9031-4BB5-...` |
| `hybrid_rrf+rerank` | qualification matrix supported hardware | 1.000 | `hvm_deployment_guide/GUID-E3635F0A-11DA-4078-...` |
| `hybrid_rrf+rerank` | configure the VM Essentials manager initial setup | 1.000 | `hvm_deployment_guide/GUID-456E190C-E912-4079-...` |
+118 -28
View File
@@ -10,7 +10,7 @@ to one entry; the highest-ranked chunk's position wins).
""" """
from __future__ import annotations from __future__ import annotations
from typing import Protocol, Iterable from typing import Iterable, Protocol
class Retriever(Protocol): class Retriever(Protocol):
@@ -21,12 +21,17 @@ class Retriever(Protocol):
... ...
def _collapse_to_pages(chunk_ids: Iterable[tuple[str, str, str]], k: int) -> list[tuple[str, str]]: def _split_chunk_id(chunk_id: str) -> tuple[str, str, int]:
"""Take a stream of (bundle_id, page_id, chunk_ordinal) and return """`bundle::page::ordinal` -> (bundle, page, int(ordinal))."""
the first k unique pages in their first-seen order.""" bid, pid, ordinal = chunk_id.split("::")
return bid, pid, int(ordinal)
def _collapse_to_pages(chunk_ids: Iterable[str], k: int) -> list[tuple[str, str]]:
seen: set[tuple[str, str]] = set() seen: set[tuple[str, str]] = set()
out: list[tuple[str, str]] = [] out: list[tuple[str, str]] = []
for bid, pid, _ord in chunk_ids: for cid in chunk_ids:
bid, pid, _ord = _split_chunk_id(cid)
key = (bid, pid) key = (bid, pid)
if key in seen: if key in seen:
continue continue
@@ -37,26 +42,111 @@ def _collapse_to_pages(chunk_ids: Iterable[tuple[str, str, str]], k: int) -> lis
return out return out
# TODO Phase 2/3 — implement these once Chroma + the bm25 module are class DenseRetriever:
# in place. Each one is small (15-30 LOC). The eval harness imports """Chroma cosine search via the live embedding function."""
# from this module by class name. name = "dense"
#
# class DenseRetriever: def __init__(self, collection, pool: int = 50):
# name = "dense" self.col = collection
# def __init__(self, collection): self.col = collection self.pool = pool
# def retrieve(self, query, k=10): ...
# def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]:
# class RerankedRetriever: res = self.col.query(query_texts=[query], n_results=self.pool)
# name = "dense+rerank" ids = (res.get("ids") or [[]])[0]
# def __init__(self, collection, rerank_url, pool=200): ... return _collapse_to_pages(ids, k)
# def retrieve(self, query, k=10): ...
#
# class BM25Retriever: class BM25Retriever:
# name = "bm25" """SQLite FTS5 lexical search."""
# def __init__(self, bm25_index): ... name = "bm25"
# def retrieve(self, query, k=10): ...
# def __init__(self, bm25_index, pool: int = 200):
# class HybridRetriever: self.bm = bm25_index
# name = "bm25+dense+rrf" self.pool = pool
# def __init__(self, dense, bm25, k_rrf=60): ...
# def retrieve(self, query, k=10): ... def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]:
hits = self.bm.query(query, n=self.pool)
return _collapse_to_pages((cid for cid, _score in hits), k)
class HybridRetriever:
"""Reciprocal Rank Fusion of dense + BM25 rankings."""
name = "hybrid_rrf"
def __init__(self, dense: DenseRetriever, bm25: BM25Retriever, k_rrf: int = 60, pool: int = 100):
self.dense = dense
self.bm25 = bm25
self.k_rrf = k_rrf
self.pool = pool
def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]:
dense_pages = self.dense.retrieve(query, k=self.pool)
bm25_pages = self.bm25.retrieve(query, k=self.pool)
scores: dict[tuple[str, str], float] = {}
for rank, page in enumerate(dense_pages, start=1):
scores[page] = scores.get(page, 0.0) + 1.0 / (self.k_rrf + rank)
for rank, page in enumerate(bm25_pages, start=1):
scores[page] = scores.get(page, 0.0) + 1.0 / (self.k_rrf + rank)
ranked = sorted(scores.items(), key=lambda kv: -kv[1])
return [page for page, _s in ranked[:k]]
def _rerank_pool(rerank_url: str, query: str, ids_and_texts: list[tuple[str, str]],
timeout: float = 30.0) -> list[str] | None:
"""POST to /v1/rerank, return ids in reranked order. None on failure."""
if not ids_and_texts:
return []
import httpx
try:
with httpx.Client(timeout=timeout) as c:
r = c.post(f"{rerank_url}/v1/rerank", json={
"query": query,
"documents": [(t or "")[:2000] for _i, t in ids_and_texts],
"top_n": len(ids_and_texts),
})
r.raise_for_status()
results = r.json().get("results") or []
return [ids_and_texts[item["index"]][0] for item in results
if isinstance(item.get("index"), int)
and 0 <= item["index"] < len(ids_and_texts)]
except Exception:
return None
class RerankedRetriever:
"""Pull a candidate pool via a base retriever, then cross-encoder re-rank."""
def __init__(self, base: Retriever, collection, rerank_url: str, name_suffix: str = "rerank",
pool: int = 50, timeout: float = 30.0):
self.base = base
self.col = collection
self.url = rerank_url
self.name = f"{base.name}+{name_suffix}"
self.pool = pool
self.timeout = timeout
def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]:
# Base returns deduplicated page-level tuples; rerank needs CHUNK-level
# texts to be informative. Pull each page's chunk 0 text from Chroma.
pages = self.base.retrieve(query, k=self.pool)
if not pages:
return []
chunk_ids = [f"{bid}::{pid}::0" for bid, pid in pages]
g = self.col.get(ids=chunk_ids, include=["documents"])
by_id = dict(zip(g["ids"], g["documents"]))
ids_and_texts = [(cid, by_id.get(cid, "")) for cid in chunk_ids]
order = _rerank_pool(self.url, query, ids_and_texts, timeout=self.timeout)
if order is None:
return pages[:k]
out: list[tuple[str, str]] = []
seen: set[tuple[str, str]] = set()
for cid in order:
bid, pid, _ = cid.split("::")
key = (bid, pid)
if key in seen:
continue
seen.add(key)
out.append(key)
if len(out) >= k:
break
return out
+81 -9
View File
@@ -76,15 +76,87 @@ def main() -> int:
queries = load_queries(args.queries) queries = load_queries(args.queries)
print(f"loaded {len(queries)} queries") print(f"loaded {len(queries)} queries")
# TODO Phase 7: instantiate the retrievers you implemented in import os
# eval/retrievers.py and run each one against each query. import chromadb
# Aggregate MRR / Recall@K / nDCG@K per retriever. Emit a from chromadb.config import Settings
# markdown table to args.output. Commit the file alongside the from rag.embeddings import embedding_function
# PR that changes retrieval. from rag.bm25 import BM25Index
raise NotImplementedError( from eval.retrievers import DenseRetriever, BM25Retriever, HybridRetriever
"Wire up the retrievers in eval/retrievers.py first, then "
"fill in this evaluation loop. See PLAN.md Phase 7." product = os.environ.get("PRODUCT_NAME", "hvm")
) repo_root = Path(__file__).resolve().parent.parent
client = chromadb.PersistentClient(path=str(repo_root / "chroma"),
settings=Settings(anonymized_telemetry=False))
col = client.get_collection(f"{product}_docs", embedding_function=embedding_function())
bm = BM25Index(str(repo_root / "bm25" / f"{product}_docs.db"))
from eval.retrievers import RerankedRetriever
dense = DenseRetriever(col)
bm25 = BM25Retriever(bm)
hybrid = HybridRetriever(DenseRetriever(col, pool=100), BM25Retriever(bm, pool=100))
retrievers = [dense, bm25, hybrid]
rerank_url = os.environ.get("RERANK_URL", "").rstrip("/")
if rerank_url:
retrievers += [
RerankedRetriever(bm25, col, rerank_url, name_suffix="rerank", pool=50),
RerankedRetriever(hybrid, col, rerank_url, name_suffix="rerank", pool=50),
]
print(f"reranker enabled: {rerank_url}")
rows: dict[str, dict[str, float]] = {}
per_query: list[dict] = []
for r in retrievers:
mrr_sum = recall_sum = ndcg_sum = 0.0
elapsed_sum = 0.0
for q in queries:
expected = [(e["bundle_id"], e["page_id"]) for e in q["expected"]]
t0 = time.time()
retrieved = r.retrieve(q["query"], k=max(args.k, 10))
elapsed = time.time() - t0
mrr = reciprocal_rank(retrieved, expected)
recall = recall_at_k(retrieved, expected, args.k)
ndcg = ndcg_at_k(retrieved, expected, args.k)
mrr_sum += mrr
recall_sum += recall
ndcg_sum += ndcg
elapsed_sum += elapsed
per_query.append({
"retriever": r.name, "query": q["query"],
"mrr": mrr, "recall@k": recall, "ndcg@k": ndcg,
"top1": list(retrieved[0]) if retrieved else None,
"elapsed_s": round(elapsed, 3),
})
n = len(queries)
rows[r.name] = {
"MRR": mrr_sum / n,
f"Recall@{args.k}": recall_sum / n,
f"nDCG@{args.k}": ndcg_sum / n,
"avg_latency_s": elapsed_sum / n,
}
print(f" {r.name}: MRR={rows[r.name]['MRR']:.3f} "
f"Recall@{args.k}={rows[r.name][f'Recall@{args.k}']:.3f} "
f"nDCG@{args.k}={rows[r.name][f'nDCG@{args.k}']:.3f} "
f"avg={rows[r.name]['avg_latency_s']*1000:.0f}ms")
args.output.parent.mkdir(parents=True, exist_ok=True)
md = [f"# Retrieval eval — k={args.k}", "",
f"_{len(queries)} hand-curated queries, generated {time.strftime('%Y-%m-%d %H:%M:%S')}_", "",
"| Retriever | MRR | Recall@{k} | nDCG@{k} | avg latency |".replace("{k}", str(args.k)),
"| --- | ---: | ---: | ---: | ---: |"]
for name, m in rows.items():
md.append(f"| `{name}` | {m['MRR']:.3f} | {m[f'Recall@{args.k}']:.3f} "
f"| {m[f'nDCG@{args.k}']:.3f} | {m['avg_latency_s']*1000:.0f}ms |")
md += ["", "## Per-query results", "",
"| Retriever | Query | MRR | top-1 |", "| --- | --- | ---: | --- |"]
for r in per_query:
top1 = f"`{r['top1'][0]}/{r['top1'][1][:24]}...`" if r["top1"] else ""
md.append(f"| `{r['retriever']}` | {r['query'][:60]} | {r['mrr']:.3f} | {top1} |")
args.output.write_text("\n".join(md) + "\n")
print(f"wrote {args.output}")
return 0
if __name__ == "__main__": if __name__ == "__main__":
View File
+120
View File
@@ -0,0 +1,120 @@
"""Minimal HTTP reranker — `/v1/rerank` endpoint over a sentence-transformers CrossEncoder.
Matches the Cohere `/v1/rerank` request/response shape, which is what the
server's `_rerank()` helper expects. This is the dev-friendly fallback;
production replaces this with the llama.cpp + jina-reranker-v2-base GGUF
sidecar (see deploy/docker-compose.yml) without changing the client.
Request:
POST /v1/rerank
{"model": "...", "query": "...", "documents": ["text", ...], "top_n": 10}
Response:
{"model": "...", "results": [{"index": 0, "relevance_score": 0.93}, ...]}
Usage:
python -m scripts.rerank_server # localhost:8001
RERANK_MODEL=cross-encoder/ms-marco-MiniLM-L-12-v2 \\
RERANK_PORT=8001 python -m scripts.rerank_server
"""
from __future__ import annotations
import json
import logging
import os
import sys
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
MODEL_NAME = os.environ.get("RERANK_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2")
PORT = int(os.environ.get("RERANK_PORT", "8001"))
HOST = os.environ.get("RERANK_HOST", "127.0.0.1")
# Truncate docs to this many chars before scoring. jina-reranker GGUF has a
# 1024-token per-pair cap that 400s the entire batch; ms-marco is more
# forgiving but we still cap to keep latency predictable.
MAX_DOC_CHARS = int(os.environ.get("RERANK_MAX_DOC_CHARS", "2000"))
_model = None
def _get_model():
global _model
if _model is None:
from sentence_transformers import CrossEncoder
log.info("loading %s", MODEL_NAME)
_model = CrossEncoder(MODEL_NAME)
log.info("loaded")
return _model
def _rerank(query: str, documents: list[str], top_n: int | None) -> list[dict]:
model = _get_model()
pairs = [[query, (d or "")[:MAX_DOC_CHARS]] for d in documents]
scores = model.predict(pairs)
ranked = sorted(
({"index": i, "relevance_score": float(s)} for i, s in enumerate(scores)),
key=lambda r: -r["relevance_score"],
)
if top_n is not None:
ranked = ranked[:top_n]
return ranked
class Handler(BaseHTTPRequestHandler):
def log_message(self, fmt, *args):
log.info("%s - %s", self.address_string(), fmt % args)
def _send_json(self, status: int, payload: dict) -> None:
body = json.dumps(payload).encode()
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def do_GET(self): # noqa: N802
if self.path in ("/", "/health"):
self._send_json(200, {"status": "ok", "model": MODEL_NAME})
return
self._send_json(404, {"error": "not found"})
def do_POST(self): # noqa: N802
if self.path not in ("/v1/rerank", "/rerank"):
self._send_json(404, {"error": "not found"})
return
length = int(self.headers.get("Content-Length", "0"))
try:
req = json.loads(self.rfile.read(length).decode())
except Exception as e:
self._send_json(400, {"error": f"bad json: {e}"})
return
query = req.get("query")
documents = req.get("documents")
if not isinstance(query, str) or not isinstance(documents, list):
self._send_json(400, {"error": "expected {query: str, documents: list[str]}"})
return
top_n = req.get("top_n")
try:
results = _rerank(query, documents, top_n if isinstance(top_n, int) else None)
except Exception as e:
log.exception("rerank failed")
self._send_json(500, {"error": str(e)})
return
self._send_json(200, {"model": MODEL_NAME, "results": results})
def main() -> int:
_get_model() # warm-load before accepting traffic
server = ThreadingHTTPServer((HOST, PORT), Handler)
log.info("listening on http://%s:%d", HOST, PORT)
try:
server.serve_forever()
except KeyboardInterrupt:
log.info("shutting down")
return 0
if __name__ == "__main__":
sys.exit(main())