From fa448f94e1815b018046a221985b087417c6a6a6 Mon Sep 17 00:00:00 2001
From: Justin Paul <justin@jpaul.me>
Date: Fri, 22 May 2026 15:26:24 -0400
Subject: [PATCH] build out morpheus-docs MCP stack, mirroring hvm-docs through
 Phases 1-13
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Initial scaffold: the docs-mcp-template clone with all the
HVM-validated stack ported across, customized for Morpheus
Enterprise (PRODUCT_NAME=morpheus, server name morpheus-docs).

Bundles (live-discovered 2026-05-22; 1710 cataloged pages total):
* morpheus_user_manual_8_1_0  sd00007510en_us  568 pages (Feb 2026)
* morpheus_user_manual_8_1_1  sd00007621en_us  569 pages (Mar 2026)
* morpheus_user_manual_8_1_2  sd00007732en_us  569 pages (Apr 2026)
* morpheus_release_notes_8_1_0  sd00007496en_us  single-doc
* morpheus_release_notes_8_1_1  sd00007610en_us  single-doc
* morpheus_release_notes_8_1_2  sd00007733en_us  single-doc
* morpheus_quickspecs            a50009231enw     html-file (live
  curl_cffi against www.hpe.com; all 12+ Enterprise SKUs captured —
  S6E64..S6E73AAE for new/renewal/upgrade × 1/3/5-yr terms, plus
  services SKUs HA124A1#V38/V39 and H46SBA1).

No Deployment Guide or Qualification Matrix on HPE Support for
Morpheus Enterprise specifically — the only QM (sd00006551en_us)
covers HVM clusters managed by Morpheus and lives in hvm-docs.

Stack carried forward from hvm-docs:
* rag/{index,chunk,embeddings,bm25}.py — including the
  MAX_CHARS=4000 chunk-cap fix for table-dense content
* docs_mcp/{server,usage}.py — 11 MCP tools, BM25-default search,
  cross-encoder rerank, hybrid behind HYBRID_SEARCH=true,
  morpheus_api_lessons (renamed from hvm_api_lessons), env-gated
  submit_doc_bug
* docs_mcp/api_lessons.md — Morpheus-specific scaffold covering
  licensing model, HVM elevation path, REST vs Plugin API, with
  TODO markers for sections to flesh out from real ops experience
* scrape/{runner,quickspecs,changelog,bundles}.py — TOC + single-doc
  + html-file modes, curl_cffi Chrome120 for www.hpe.com edge bypass
* eval/{retrievers,run_eval}.py + queries.jsonl scaffold (4 placeholder
  queries; populate after first scrape)
* scripts/{rerank_server,usage_report,registry_gc}.py
* .gitea/workflows/{refresh,image-only}.yml — same Gitea Actions
  setup zerto-docs uses (push LAN, pull public-URL, GPU Ollama pool)
* deploy/docker-compose.yml — morpheus-docs-mcp service definition,
  shared jina-rerank sidecar, Watchtower-labeled
* Dockerfile, requirements.txt, requirements-rerank.txt

Verified locally: scrape produced 1599 .md pages (some TOC entries
are parent-only and yield no body), 6353 chunks all under the 4 KB
cap, MCP server boots and lists 11 tools cleanly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .gitea/workflows/image-only.yml |  109 +--
 .gitea/workflows/refresh.yml    |  144 ++--
 bundles.json                    |  119 ++++
 deploy/docker-compose.yml       |   40 +-
 docs_mcp/api_lessons.md         |  148 ++++
 docs_mcp/server.py              | 1117 +++++++++++++++++++++++++++++--
 eval/queries.jsonl              |    4 +
 eval/retrievers.py              |  146 +++-
 eval/run_eval.py                |   90 ++-
 rag/chunk.py                    |   50 +-
 rag/embeddings.py               |   25 +-
 rag/index.py                    |    2 +-
 requirements-rerank.txt         |   10 +
 requirements.txt                |    8 +
 scrape/README.md                |   66 ++
 scrape/bundles.py               |  200 ++++++
 scrape/quickspecs.py            |  194 ++++++
 scrape/quickspecs/README.md     |   27 +
 scrape/runner.py                |  339 ++++++++++
 scripts/__init__.py             |    0
 scripts/registry_gc.py          |  111 +--
 scripts/rerank_server.py        |  120 ++++
 22 files changed, 2822 insertions(+), 247 deletions(-)
 create mode 100644 bundles.json
 create mode 100644 docs_mcp/api_lessons.md
 create mode 100644 eval/queries.jsonl
 create mode 100644 requirements-rerank.txt
 create mode 100644 scrape/bundles.py
 create mode 100644 scrape/quickspecs.py
 create mode 100644 scrape/quickspecs/README.md
 create mode 100644 scrape/runner.py
 create mode 100644 scripts/__init__.py
 create mode 100644 scripts/rerank_server.py

diff --git a/.gitea/workflows/image-only.yml b/.gitea/workflows/image-only.yml
index abe60b2..4e09574 100644
--- a/.gitea/workflows/image-only.yml
+++ b/.gitea/workflows/image-only.yml
@@ -14,21 +14,17 @@ on:
   workflow_dispatch:
 
 env:
-  REGISTRY_PUSH: <lan-host>:<port>
-  REGISTRY_PULL: <public-registry-hostname>
-  # Image name derives from the actual repo at runtime, so a clone
-  # doesn't need to find/replace anything. e.g. justin/my-product-docs.
-  # github.* context is Gitea Actions' inherited GitHub-Actions namespace
-  # — values come from the Gitea server, not github.com.
+  # PUSH goes to the LAN endpoint (HTTP) to bypass Cloudflare's 100 MB
+  # body cap. PULL uses the public hostname (HTTPS). Same Gitea registry.
+  REGISTRY_PUSH: 192.168.0.2:1234
+  REGISTRY_PULL: git.jpaul.io
   IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }}
-  OLLAMA_URL: http://<gpu-host>:11434
+  # Two GPU-pinned Ollama containers on the Gitea host — same infra
+  # zerto-docs uses. :11435 = Titan X, :11436 = 1080 Ti. Indexer
+  # round-robins per batch.
+  OLLAMA_URLS: http://192.168.0.2:11435,http://192.168.0.2:11436
   EMBED_MODEL: nomic-embed-text
-  # PRODUCT_NAME defaults to the repo name so a clone works without
-  # editing. Override here if you want a different identifier (e.g.
-  # repo "my-product-docs" → PRODUCT_NAME "myproduct"). Used as the
-  # Chroma collection name, BM25 db filename, and MCP server name —
-  # see docs_mcp/server.py.
-  PRODUCT_NAME: ${{ github.event.repository.name }}
+  PRODUCT_NAME: morpheus
 
 jobs:
   build:
@@ -39,8 +35,7 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
         with:
-          # Full history (not shallow) so the digest-history step can
-          # walk git log up to --history-days back.
+          # Full history so digest-history can walk git log.
           fetch-depth: 0
 
       - name: Set up Python
@@ -54,9 +49,8 @@ jobs:
           python -m pip install -q -r requirements.txt
 
       - name: Refresh digest history
-        # Cheap (a few seconds); doesn't touch corpus content.
-        # Without this step, a code-only deploy would ship an
-        # increasingly-stale digest history relative to git.
+        # Cheap (few seconds). Without this step, a code-only deploy
+        # would ship an increasingly-stale digest history.
         run: |
           mkdir -p corpus/.digest
           python -m scrape.changelog \
@@ -71,42 +65,69 @@ jobs:
       - name: Rebuild indexes from existing corpus
         run: python -m rag.index --rebuild
 
-      - name: Log in to registry (LAN endpoint)
-        run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login "${REGISTRY_PUSH}" -u "${{ github.repository_owner }}" --password-stdin
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          # LAN registry is HTTP only.
+          config-inline: |
+            [registry."192.168.0.2:1234"]
+              http = true
+              insecure = true
 
-      - name: Build & push image
+      - name: Configure registry credentials for buildx
+        env:
+          REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
+          REGISTRY_USER: ${{ github.actor }}
         run: |
-          SHA_TAG=$(echo "$GITHUB_SHA" | cut -c1-12)
-          DATE_TAG=$(date -u +%Y.%m.%d)
-          docker build \
-            -t "${REGISTRY_PUSH}/${IMAGE}:latest" \
-            -t "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}" \
-            -t "${REGISTRY_PUSH}/${IMAGE}:${DATE_TAG}" \
-            .
-          docker push "${REGISTRY_PUSH}/${IMAGE}:latest"
-          docker push "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}"
-          docker push "${REGISTRY_PUSH}/${IMAGE}:${DATE_TAG}"
+          mkdir -p ~/.docker
+          AUTH=$(printf '%s:%s' "$REGISTRY_USER" "$REGISTRY_TOKEN" | base64 -w0)
+          cat > ~/.docker/config.json <<EOF
+          {
+            "auths": {
+              "192.168.0.2:1234": {
+                "auth": "$AUTH"
+              }
+            }
+          }
+          EOF
+
+      - name: Compute tags
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: 192.168.0.2:1234/${{ github.repository_owner }}/${{ github.event.repository.name }}
+          tags: |
+            type=raw,value=latest
+            type=sha,prefix=,format=short
+            type=raw,value={{date 'YYYY.MM.DD'}}
+          labels: |
+            org.opencontainers.image.source=https://git.jpaul.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
+            org.opencontainers.image.url=https://git.jpaul.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
+
+      - name: Build & push (amd64)
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
 
       - name: Link container package to this repo
-        # Gitea container packages are owned by a USER, not a repo —
-        # they don't auto-appear under the repo's Packages tab.
-        # This API call creates the association. One-time-effective:
-        # re-running returns 400 once linked, which we swallow.
-        # Endpoint requires Gitea 1.21+.
         env:
           GITEA_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
         run: |
           OWNER="${{ github.repository_owner }}"
           PKG="${{ github.event.repository.name }}"
-          BODY=$(mktemp)
-          CODE=$(curl -sS -o "$BODY" -w "%{http_code}" -X POST \
+          code=$(curl -s -o /tmp/link.out -w "%{http_code}" -X POST \
             -H "Authorization: token ${GITEA_TOKEN}" \
-            "https://${REGISTRY_PULL}/api/v1/packages/${OWNER}/container/${PKG}/-/link/${PKG}")
-          echo "link http=$CODE  body=$(cat "$BODY")"
-          case "$CODE" in
-            201) echo "linked package to ${OWNER}/${PKG}" ;;
-            400) echo "already linked (re-link returns 400) — ok" ;;
-            *)   echo "unexpected status $CODE"; exit 1 ;;
+            "https://git.jpaul.io/api/v1/packages/${OWNER}/container/${PKG}/-/link/${PKG}")
+          echo "link ${OWNER}/container/${PKG} -> ${PKG}: HTTP ${code}"
+          body=$(cat /tmp/link.out)
+          case "$code" in
+            201)      echo "OK — newly linked" ;;
+            400|409)  echo "OK — already linked: ${body}" ;;
+            *)        echo "unexpected: ${body}"; exit 1 ;;
           esac
 
       - name: Prune old container versions
diff --git a/.gitea/workflows/refresh.yml b/.gitea/workflows/refresh.yml
index ef7f504..caef74a 100644
--- a/.gitea/workflows/refresh.yml
+++ b/.gitea/workflows/refresh.yml
@@ -19,27 +19,25 @@ on:
         default: false
 
 env:
-  # If your registry sits behind Cloudflare with its 100 MB body cap,
-  # use a LAN endpoint for pushes (bypasses CF) and the public hostname
-  # for pulls (response bodies aren't capped).
-  REGISTRY_PUSH: <lan-host>:<port>
-  REGISTRY_PULL: <public-registry-hostname>
-  # Image name derives from the actual repo at runtime, so a clone
-  # doesn't need to find/replace anything. e.g. justin/my-product-docs.
-  # github.* context is Gitea Actions' inherited GitHub-Actions namespace
-  # — values come from the Gitea server, not github.com.
+  # PUSH goes to the LAN endpoint (HTTP) to bypass Cloudflare Tunnel's
+  # 100 MB body cap. PULL uses the public hostname (HTTPS). Same Gitea
+  # registry either way — package lands under the same owner/repo.
+  REGISTRY_PUSH: 192.168.0.2:1234
+  REGISTRY_PULL: git.jpaul.io
+
+  # Image name derives from the repo at runtime — clones don't need to
+  # edit this. github.* is the Gitea-Actions inherited namespace.
   IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }}
 
-  # Embedder. One URL per GPU; the indexer round-robins.
-  OLLAMA_URL: http://<gpu-host>:11434
+  # Two GPU-pinned Ollama containers on the Gitea host — same infra
+  # zerto-docs uses (deploy/ollama-rag.docker-compose.yml over there).
+  # :11435 owns the Titan X, :11436 owns the 1080 Ti; the indexer
+  # round-robins per batch so both cards run in parallel. The host's
+  # primary Ollama on :11434 is left alone for OpenWebUI etc.
+  OLLAMA_URLS: http://192.168.0.2:11435,http://192.168.0.2:11436
   EMBED_MODEL: nomic-embed-text
 
-  # PRODUCT_NAME defaults to the repo name so a clone works without
-  # editing. Override here if you want a different identifier (e.g.
-  # repo "my-product-docs" → PRODUCT_NAME "myproduct"). Used as the
-  # Chroma collection name, BM25 db filename, and MCP server name —
-  # see docs_mcp/server.py.
-  PRODUCT_NAME: ${{ github.event.repository.name }}
+  PRODUCT_NAME: morpheus
 
 jobs:
   refresh:
@@ -50,10 +48,12 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
         with:
-          # Full history — required for the digest-history step to
-          # walk git log. Default fetch-depth: 1 silently produces a
-          # 0-byte history file.
+          # Full history — required for digest-history. Default depth 1
+          # silently produces a 0-byte history file.
           fetch-depth: 0
+          # Set the credentials Gitea injects so we can push corpus
+          # commits back. Persist them across the run.
+          token: ${{ secrets.GITEA_TOKEN }}
 
       - name: Set up Python
         uses: actions/setup-python@v5
@@ -89,8 +89,8 @@ jobs:
       - name: Commit corpus changes (if any)
         id: commit
         run: |
-          git config user.name "<product>-docs-refresh"
-          git config user.email "actions@<your-domain>"
+          git config user.name "hvm-docs-refresh"
+          git config user.email "actions@jpaul.io"
           git add bundles.json corpus
           if git diff --cached --quiet; then
             echo "no corpus changes — skipping reindex and image build"
@@ -132,49 +132,89 @@ jobs:
         if: steps.commit.outputs.changed == 'true' || inputs.force_build == true
         run: python -m rag.index --rebuild
 
-      # ---- Build & push image ------------------------------------
-      - name: Log in to registry (LAN endpoint)
+      # ---- Build & push image (LAN endpoint, buildx) -------------
+      - name: Set up Docker Buildx
         if: steps.commit.outputs.changed == 'true' || inputs.force_build == true
-        run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login "${REGISTRY_PUSH}" -u "${{ github.repository_owner }}" --password-stdin
+        uses: docker/setup-buildx-action@v3
+        with:
+          # LAN registry is HTTP only. Buildkit needs an explicit
+          # insecure-registry config or it tries to upgrade to HTTPS.
+          config-inline: |
+            [registry."192.168.0.2:1234"]
+              http = true
+              insecure = true
 
-      - name: Build & push image
+      - name: Configure registry credentials for buildx
+        # Can't use docker/login-action against the LAN endpoint —
+        # the host docker daemon errors on HTTP-vs-HTTPS. Buildx reads
+        # ~/.docker/config.json directly, so write the auth ourselves.
         if: steps.commit.outputs.changed == 'true' || inputs.force_build == true
-        # Runner shell is /bin/sh — use cut instead of ${VAR::N}.
-        # Three tags: :latest (Watchtower target), :<sha12>
-        # (rollback pin), :<YYYY.MM.DD> (human-readable).
+        env:
+          REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
+          REGISTRY_USER: ${{ github.actor }}
         run: |
-          SHA_TAG=$(echo "$GITHUB_SHA" | cut -c1-12)
-          DATE_TAG=$(date -u +%Y.%m.%d)
-          docker build \
-            -t "${REGISTRY_PUSH}/${IMAGE}:latest" \
-            -t "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}" \
-            -t "${REGISTRY_PUSH}/${IMAGE}:${DATE_TAG}" \
-            .
-          docker push "${REGISTRY_PUSH}/${IMAGE}:latest"
-          docker push "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}"
-          docker push "${REGISTRY_PUSH}/${IMAGE}:${DATE_TAG}"
+          mkdir -p ~/.docker
+          AUTH=$(printf '%s:%s' "$REGISTRY_USER" "$REGISTRY_TOKEN" | base64 -w0)
+          cat > ~/.docker/config.json <<EOF
+          {
+            "auths": {
+              "192.168.0.2:1234": {
+                "auth": "$AUTH"
+              }
+            }
+          }
+          EOF
+
+      - name: Compute tags
+        id: meta
+        if: steps.commit.outputs.changed == 'true' || inputs.force_build == true
+        uses: docker/metadata-action@v5
+        with:
+          # Tag with the LAN hostname so the push goes over LAN.
+          # docker-compose on the deploy host pulls via git.jpaul.io.
+          images: 192.168.0.2:1234/${{ github.repository_owner }}/${{ github.event.repository.name }}
+          tags: |
+            type=raw,value=latest
+            type=sha,prefix=,format=short
+            type=schedule,pattern={{date 'YYYY.MM.DD'}}
+            type=raw,value={{date 'YYYY.MM.DD'}}
+          # Override auto-derived labels with the PUBLIC URL so Gitea
+          # can auto-link the package back to this repo.
+          labels: |
+            org.opencontainers.image.source=https://git.jpaul.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
+            org.opencontainers.image.url=https://git.jpaul.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
+
+      - name: Build & push (amd64)
+        if: steps.commit.outputs.changed == 'true' || inputs.force_build == true
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
 
       - name: Link container package to this repo
-        # Gitea container packages are owned by a USER, not a repo —
-        # they don't auto-appear under the repo's Packages tab.
-        # This API call creates the association. One-time-effective:
-        # re-running returns 400 once linked, which we swallow.
-        # Endpoint requires Gitea 1.21+.
+        # Idempotent linkage so the package shows under the repo's
+        # Packages tab. Gitea's auto-link from the source label is
+        # unreliable in this setup (the runner reports an internal
+        # server URL), so we link explicitly. 201 = newly linked,
+        # 400 = already linked (treated as success).
         if: steps.commit.outputs.changed == 'true' || inputs.force_build == true
         env:
           GITEA_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
         run: |
           OWNER="${{ github.repository_owner }}"
           PKG="${{ github.event.repository.name }}"
-          BODY=$(mktemp)
-          CODE=$(curl -sS -o "$BODY" -w "%{http_code}" -X POST \
+          code=$(curl -s -o /tmp/link.out -w "%{http_code}" -X POST \
             -H "Authorization: token ${GITEA_TOKEN}" \
-            "https://${REGISTRY_PULL}/api/v1/packages/${OWNER}/container/${PKG}/-/link/${PKG}")
-          echo "link http=$CODE  body=$(cat "$BODY")"
-          case "$CODE" in
-            201) echo "linked package to ${OWNER}/${PKG}" ;;
-            400) echo "already linked (re-link returns 400) — ok" ;;
-            *)   echo "unexpected status $CODE"; exit 1 ;;
+            "https://git.jpaul.io/api/v1/packages/${OWNER}/container/${PKG}/-/link/${PKG}")
+          echo "link ${OWNER}/container/${PKG} -> ${PKG}: HTTP ${code}"
+          body=$(cat /tmp/link.out)
+          case "$code" in
+            201)      echo "OK — newly linked" ;;
+            400|409)  echo "OK — already linked: ${body}" ;;
+            *)        echo "unexpected: ${body}"; exit 1 ;;
           esac
 
       # ---- Registry GC -------------------------------------------
diff --git a/bundles.json b/bundles.json
new file mode 100644
index 0000000..9aef51a
--- /dev/null
+++ b/bundles.json
@@ -0,0 +1,119 @@
+[
+  {
+    "slug": "morpheus_user_manual_8_1_0",
+    "doc_id": "sd00007510en_us",
+    "title": "HPE Morpheus Enterprise Software Documentation v8.1.0",
+    "version": "8.1.0",
+    "platform": null,
+    "product": "User Manual",
+    "language": "en-US",
+    "page_count": 568,
+    "mode": "toc",
+    "abstract": "",
+    "dates": {
+      "Published": "February 2026"
+    },
+    "landing_page": "GUID-709AAADB-A9C1-40B6-AD22-958EE7E6F312",
+    "source_url": "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007510en_us"
+  },
+  {
+    "slug": "morpheus_user_manual_8_1_1",
+    "doc_id": "sd00007621en_us",
+    "title": "HPE Morpheus Enterprise Software Documentation v8.1.1",
+    "version": "8.1.1",
+    "platform": null,
+    "product": "User Manual",
+    "language": "en-US",
+    "page_count": 569,
+    "mode": "toc",
+    "abstract": "",
+    "dates": {
+      "Published": "March 2026"
+    },
+    "landing_page": "GUID-709AAADB-A9C1-40B6-AD22-958EE7E6F312",
+    "source_url": "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007621en_us"
+  },
+  {
+    "slug": "morpheus_user_manual_8_1_2",
+    "doc_id": "sd00007732en_us",
+    "title": "HPE Morpheus Enterprise Software Documentation v8.1.2",
+    "version": "8.1.2",
+    "platform": null,
+    "product": "User Manual",
+    "language": "en-US",
+    "page_count": 569,
+    "mode": "toc",
+    "abstract": "",
+    "dates": {
+      "Published": "April 2026"
+    },
+    "landing_page": "GUID-709AAADB-A9C1-40B6-AD22-958EE7E6F312",
+    "source_url": "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007732en_us"
+  },
+  {
+    "slug": "morpheus_release_notes_8_1_0",
+    "doc_id": "sd00007496en_us",
+    "title": "v8.1.0 Release Notes",
+    "version": "8.1.0",
+    "platform": null,
+    "product": "Release Notes",
+    "language": "en-US",
+    "page_count": 1,
+    "mode": "single",
+    "abstract": "Release notes for HPE Morpheus Enterprise Software version v8.1.0",
+    "dates": {
+      "Published": "February 2026"
+    },
+    "landing_page": "sd00007496en_us",
+    "source_url": "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007496en_us"
+  },
+  {
+    "slug": "morpheus_release_notes_8_1_1",
+    "doc_id": "sd00007610en_us",
+    "title": "v8.1.1 Release Notes",
+    "version": "8.1.1",
+    "platform": null,
+    "product": "Release Notes",
+    "language": "en-US",
+    "page_count": 1,
+    "mode": "single",
+    "abstract": "Release notes for HPE Morpheus Enterprise Software version v8.1.1",
+    "dates": {
+      "Published": "March 2026"
+    },
+    "landing_page": "sd00007610en_us",
+    "source_url": "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007610en_us"
+  },
+  {
+    "slug": "morpheus_release_notes_8_1_2",
+    "doc_id": "sd00007733en_us",
+    "title": "v8.1.2 Release Notes",
+    "version": "8.1.2",
+    "platform": null,
+    "product": "Release Notes",
+    "language": "en-US",
+    "page_count": 1,
+    "mode": "single",
+    "abstract": "Release notes for HPE Morpheus Enterprise Software version v8.1.2",
+    "dates": {
+      "Published": "April 2026"
+    },
+    "landing_page": "sd00007733en_us",
+    "source_url": "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007733en_us"
+  },
+  {
+    "slug": "morpheus_quickspecs",
+    "doc_id": "a50009231enw",
+    "title": "HPE Morpheus Enterprise Software QuickSpecs",
+    "version": "v1",
+    "platform": null,
+    "product": "QuickSpecs",
+    "language": "en-US",
+    "page_count": 1,
+    "mode": "html-file",
+    "abstract": "",
+    "dates": {},
+    "landing_page": "a50009231enw",
+    "source_url": "https://www.hpe.com/psnow/doc/a50009231enw"
+  }
+]
diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml
index 0aa05a8..39691d6 100644
--- a/deploy/docker-compose.yml
+++ b/deploy/docker-compose.yml
@@ -1,6 +1,6 @@
 # Hosting stack for a docs MCP server.
 #
-# Replace <product> below with your product name on first deploy.
+# Replace hvm below with your product name on first deploy.
 # Volumes: usage logs are mounted to a host path so they survive
 # Watchtower-driven container recreates.
 #
@@ -10,15 +10,15 @@
 services:
 
   # The MCP server. Watchtower auto-pulls on :latest changes.
-  <product>-docs-mcp:
-    image: <registry>/<owner>/<product>-docs-mcp:latest
-    container_name: <product>-docs-mcp
+  morpheus-docs-mcp:
+    image: git.jpaul.io/justin/morpheus-docs:latest
+    container_name: morpheus-docs-mcp
     restart: unless-stopped
     ports:
       - "8000:8000"
     environment:
-      PRODUCT_NAME: "<product>"
-      PRODUCT_DOCS_URL: "https://docs.example.com"
+      PRODUCT_NAME: "morpheus"
+      PRODUCT_DOCS_URL: "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007732en_us"
 
       # Streamable-HTTP transport. Stateless mode is required for
       # production: clients don't lose sessions when Watchtower
@@ -28,19 +28,21 @@ services:
       MCP_PORT: "8000"
 
       # If you run MetaMCP or another gateway in front and reach
-      # this container via its compose DNS name (e.g. <product>-docs-mcp:8000),
+      # this container via its compose DNS name (e.g. morpheus-docs-mcp:8000),
       # add that hostname here. "*" disables the rebind check entirely.
-      MCP_ALLOWED_HOSTS: "<product>-docs-mcp,localhost,127.0.0.1"
+      MCP_ALLOWED_HOSTS: "morpheus-docs-mcp,localhost,127.0.0.1"
 
       # Phase 6 — reranker sidecar (jina-reranker-v2-base via llama.cpp).
-      RERANK_URL: http://<product>-rerank:8080
+      RERANK_URL: http://hvm-rerank:8080
       RERANK_POOL: "200"
       RERANK_TIMEOUT: "30"
 
-      # Phase 8 — hybrid retrieval (BM25 + dense + RRF). Set true
-      # only after the eval harness shows the dense-only path
-      # missing technical-term queries that BM25 catches.
-      HYBRID_SEARCH: "true"
+      # Phase 8 — hybrid retrieval (BM25 + dense + RRF).
+      # Eval on the HVM corpus (eval/results/baseline.md, 2026-05-22) shows
+      # BM25-default + reranker beats hybrid on every metric (MRR 0.920 vs
+      # 0.875). Leaving HYBRID_SEARCH off so search_docs runs BM25-first +
+      # reranker; dense is the fallback when BM25 finds nothing.
+      HYBRID_SEARCH: "false"
 
       # Phase 10 — usage telemetry.
       USAGE_LOG_DIR: /app/var/logs
@@ -52,9 +54,9 @@ services:
       # DOC_BUG_API_URL: "https://docs-be.example.com/api/feedback"
     volumes:
       # Usage logs persist across container recreates.
-      - ./<product>-docs-mcp-logs:/app/var/logs
+      - ./morpheus-docs-mcp-logs:/app/var/logs
     depends_on:
-      - <product>-rerank
+      - hvm-rerank
     labels:
       # Watchtower polls *only* containers with this label set true.
       com.centurylinklabs.watchtower.enable: "true"
@@ -63,9 +65,13 @@ services:
 
   # Reranker sidecar — llama.cpp serving jina-reranker-v2-base.
   # Requires GPU access; adjust runtime/devices for your hardware.
-  <product>-rerank:
+  #
+  # For dev / CPU-only hosts, swap this service for scripts/rerank_server.py
+  # (sentence-transformers ms-marco-MiniLM-L-6-v2). Same /v1/rerank shape,
+  # ~500ms/batch on CPU vs ~50ms on GPU with the jina GGUF.
+  hvm-rerank:
     image: ghcr.io/ggml-org/llama.cpp:server-cuda
-    container_name: <product>-rerank
+    container_name: hvm-rerank
     restart: unless-stopped
     # Mount the GGUF model from the host. Download from huggingface
     # (gguf-org/jina-reranker-v2-base-multilingual-GGUF) first.
diff --git a/docs_mcp/api_lessons.md b/docs_mcp/api_lessons.md
new file mode 100644
index 0000000..df1b789
--- /dev/null
+++ b/docs_mcp/api_lessons.md
@@ -0,0 +1,148 @@
+# HPE Morpheus Enterprise — Lessons
+
+Notes and gotchas about running, integrating with, and licensing
+**HPE Morpheus Enterprise Software** that aren't obvious from the
+official docs alone. The official User Manual + Release Notes +
+QuickSpecs describe the product as designed; this file is what
+experienced operators actually learn.
+
+> Treat this as living context. Update it when you (or the LLM
+> driving this MCP) discover something non-obvious that the docs
+> don't say or don't make findable. Each section is an H2 so the
+> `morpheus_api_lessons(topic=...)` tool can return just the
+> relevant piece.
+
+## TL;DR
+
+- **Morpheus Enterprise is the full cloud-management platform.** HPE
+  Morpheus VM Essentials (HVM) is the VM-only subset; Morpheus
+  Enterprise is what you "elevate to" when you need multi-cloud,
+  containers, automation, policy, FinOps, ITSM integration, and
+  self-service catalogs. The relationship is one-way upgrade.
+- **Licensing is per physical CPU socket** on connected on-prem
+  clouds (bare metal, hypervisor hosts, Kubernetes worker nodes).
+  Public-cloud workloads (AWS / Azure / GCP / OCI) are factored at
+  **15 workloads per socket** equivalent.
+- **All license SKUs include Tech Care Essentials 24×7** as part
+  of the license cost. There is no separate purchase for support
+  on the license tier.
+- **`morpheus_quickspecs` is the source of truth for SKUs.** Don't
+  guess part numbers; query the QuickSpecs bundle.
+
+## Licensing and SKUs
+
+**Source of truth: the `morpheus_quickspecs` bundle.** Query it for
+the current SKU list — the catalog updates more often than this
+file does.
+
+Pricing model summary (from QuickSpecs v1, 2026):
+
+- **Per physical CPU socket** for connected on-prem clouds —
+  KVM/HVM hosts, VMware ESXi hosts, bare metal servers, Kubernetes
+  worker nodes. Count the **sockets**, not the cores; not the VMs.
+- **Public cloud workloads factor at 15:1** — one socket of license
+  covers up to 15 public-cloud workloads (instances) across AWS,
+  Azure, GCP, OCI.
+- **Term-based** licensing (not perpetual). 1, 3, and 5-year terms
+  on E-LTU SKUs.
+- **All include HPE Tech Care Essentials** (24×7 support, 15-minute
+  response for severity-1) bundled into the license cost.
+
+> The exact ratios and SKU names can change between QuickSpecs
+> revisions. Use the `morpheus_quickspecs` tool / bundle for current
+> values rather than memorizing.
+
+## Elevation from HVM
+
+The "elevate to Morpheus Enterprise" path is the canonical journey
+for customers who started on HVM and outgrew it:
+
+- **HVM clusters keep working unchanged after elevation.** You
+  don't redeploy the manager; you upgrade-in-place using a
+  Morpheus Enterprise license.
+- **What changes:** the manager UI unlocks the full Enterprise
+  feature set — public-cloud integrations, container/Kubernetes
+  management, blueprints/catalogs, automation workflows, policy
+  engine, FinOps cost dashboards, ITSM connectors (ServiceNow etc.),
+  and the full REST API surface.
+- **Existing HVM-tier work products survive the elevation:**
+  Instance backups, network pools, storage providers, user
+  accounts, integrations, scheduled jobs, etc.
+
+The HVM User Manual page `Elevating to HPE Morpheus Enterprise`
+(GUID-ECCA4FDD-37C8-45CE-A71F-C6E73B3BA713) walks the procedure.
+See also the HVM `morpheus-docs` sibling MCP's
+`hvm_user_manual_8_1_*` bundles.
+
+## API surface — Plugin vs REST
+
+Morpheus exposes two completely separate extensibility surfaces:
+
+- **REST API** at `https://<manager>/api/` — external automation
+  and integration. Bearer-token authentication; tokens issued from
+  the user profile → API tokens UI. Full Enterprise API surface
+  available (vs HVM-only managers which 404 on Enterprise-only
+  endpoints).
+- **Plugin API** — server-side extensions that load INTO the
+  manager process. Versioned independently of the platform
+  (Plugin API version listed in the Release Notes for each
+  Morpheus version). A plugin built for Plugin API 1.3.x may not
+  load on 1.4.x without changes.
+
+**TODO — fill in real operational lessons as we accumulate them.**
+
+## Multi-cloud onboarding
+
+**TODO.** Each cloud (AWS, Azure, GCP, OCI, VMware vSphere, KVM/HVM,
+OpenStack, Nutanix, etc.) has its own onboarding ritual: credentials,
+networking, IAM roles, regions, storage providers, image catalogs.
+Search the User Manual: `search_docs(query="Add AWS cloud
+integration")`, `search_docs(query="Azure subscription cost")`, etc.
+
+## Tenancy, RBAC, and groups
+
+**TODO.** Morpheus Enterprise tenancy is one of the more complex areas
+— tenants, roles, groups, account groups, persona-based access.
+Lessons specific to "what surprised me" go here.
+
+## Backups
+
+**TODO.** Morpheus Enterprise inherits the backup framework HVM
+introduced (Storage Buckets, Execution Schedules, Backup Jobs)
+and adds: cloud-native backup integrations (AWS Backup, Azure
+Backup), per-instance backup policies via the policy engine,
+ServiceNow-driven backup orchestration. Document the gotchas you
+hit.
+
+## Common operational gotchas
+
+**TODO.** This is where the "experienced operator hallway
+conversation" notes go. Examples to seed (delete or replace as you
+learn):
+
+- **Service plan vs Instance type** — same concept, different
+  contexts. A service plan is the sizing template ("small / medium
+  / large with these CPU/RAM"); an instance type is what you
+  provision FROM the plan. Operators conflate them.
+- **Cloud integration credentials are tenant-scoped, not
+  global.** Adding a credential at the master tenant doesn't
+  cascade — sub-tenants need their own (or the policy engine
+  granting access).
+- **Policy engine vs Logic library** — both live under
+  Library/Automation, both can gate provisioning. Policies are
+  preventive (block bad config), logic is generative (run scripts
+  on lifecycle events). Pick the right tool.
+
+## Adding to this doc
+
+Two ways:
+
+1. Manually edit `docs_mcp/api_lessons.md` in this repo and commit.
+   The next image build picks it up.
+2. Use `submit_doc_bug` for upstream issues, and append the
+   takeaway here once the docs team responds.
+
+The point of this doc is to surface the kind of context an
+experienced operator would mention in a hallway conversation but
+that doesn't quite fit anywhere in the formal product docs. Keep
+sections tight — one H2 = one topic the LLM can return on demand.
diff --git a/docs_mcp/server.py b/docs_mcp/server.py
index 28b1345..d86a0a3 100644
--- a/docs_mcp/server.py
+++ b/docs_mcp/server.py
@@ -18,6 +18,8 @@ stable across products — clients depend on them.
 """
 from __future__ import annotations
 
+import datetime as _dt
+import difflib
 import json
 import logging
 import os
@@ -35,8 +37,11 @@ log = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Product-specific configuration. Set these for each new build.
 # ---------------------------------------------------------------------------
-PRODUCT_NAME = os.environ.get("PRODUCT_NAME", "myproduct")
-PRODUCT_DOCS_URL = os.environ.get("PRODUCT_DOCS_URL", "https://docs.example.com")
+PRODUCT_NAME = os.environ.get("PRODUCT_NAME", "morpheus")
+PRODUCT_DOCS_URL = os.environ.get(
+    "PRODUCT_DOCS_URL",
+    "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007732en_us",
+)
 COLLECTION = f"{PRODUCT_NAME}_docs"
 
 # Paths inside the deployed container (and matching layout locally for dev).
@@ -45,6 +50,8 @@ CORPUS = ROOT / "corpus"
 CHROMA_DIR = ROOT / "chroma"
 BM25_DB = Path(os.environ.get("BM25_DB", str(ROOT / "bm25" / f"{PRODUCT_NAME}_docs.db")))
 BUNDLES_JSON = ROOT / "bundles.json"
+DIGEST_HISTORY_PATH = CORPUS / ".digest" / "history.jsonl"
+API_LESSONS_MD = Path(__file__).resolve().parent / "api_lessons.md"
 
 # ---------------------------------------------------------------------------
 # Feature flags (Phase 6 / 8 / 12 enable these as you ship each phase).
@@ -104,6 +111,15 @@ def _build_where(version: str | None, platform: str | None, bundle_id: str | Non
     return {"$and": conds}
 
 
+def _where_for_bm25(version: str | None, platform: str | None, bundle_id: str | None) -> dict | None:
+    """BM25Index.query takes a flat dict of equality filters."""
+    w: dict[str, str] = {}
+    if version: w["version"] = version
+    if platform: w["platform"] = platform
+    if bundle_id: w["bundle_id"] = bundle_id
+    return w or None
+
+
 def _read_page(bundle_id: str, page_id: str) -> tuple[str, dict] | None:
     """Read a corpus page off disk. Returns (markdown_body, metadata_dict)."""
     md_path = CORPUS / bundle_id / (page_id + ".md")
@@ -113,6 +129,115 @@ def _read_page(bundle_id: str, page_id: str) -> tuple[str, dict] | None:
     return md_path.read_text(), json.loads(json_path.read_text())
 
 
+_CHROMA = None
+_BM25 = None
+
+
+def _collection():
+    """Lazy Chroma collection handle. Cached after first call."""
+    global _CHROMA
+    if _CHROMA is None:
+        import chromadb
+        from chromadb.config import Settings
+        from rag.embeddings import embedding_function
+
+        client = chromadb.PersistentClient(
+            path=str(CHROMA_DIR),
+            settings=Settings(anonymized_telemetry=False),
+        )
+        _CHROMA = client.get_collection(COLLECTION, embedding_function=embedding_function())
+    return _CHROMA
+
+
+def _bm25():
+    """Lazy BM25Index handle. None if the FTS5 db isn't built."""
+    global _BM25
+    if _BM25 is None:
+        if not BM25_DB.exists():
+            return None
+        try:
+            from rag.bm25 import BM25Index
+            _BM25 = BM25Index(str(BM25_DB))
+        except Exception as e:  # defensive: hybrid must never block dense
+            log.warning("BM25 unavailable, falling back to dense-only: %s", e)
+            return None
+    return _BM25
+
+
+def _enrich_from_chroma(col, chunk_ids: list[str], fused: list | None) -> tuple[list[str], list[dict], list[float]]:
+    """Fetch document text + metadata for a list of chunk ids from Chroma, in order."""
+    if not chunk_ids:
+        return [], [], []
+    g = col.get(ids=chunk_ids, include=["documents", "metadatas"])
+    by_id = {i: (d, m) for i, d, m in zip(g["ids"], g["documents"], g["metadatas"])}
+    docs = [by_id[i][0] for i in chunk_ids if i in by_id]
+    metas = [by_id[i][1] for i in chunk_ids if i in by_id]
+    if fused is not None:
+        dists = [1.0 - score for _id, score, _src in fused[:len(docs)]]
+    else:
+        dists = [0.0] * len(docs)
+    return docs, metas, dists
+
+
+def _rerank(query: str, candidates: list[tuple[str, str]]) -> list[tuple[str, str]] | None:
+    """POST to RERANK_URL /v1/rerank, return candidates re-ordered by relevance.
+
+    `candidates` is `[(chunk_id, text), ...]`. Texts are truncated to ~2000 chars
+    before sending so we never blow past jina-reranker's 1024-token per-pair
+    cap (which 400s the entire batch). The full untruncated text still goes
+    back to the user from Chroma; truncation is reranking-only.
+
+    Returns None on any failure — caller treats that as "skip reranking,
+    keep retrieval-order candidates."
+    """
+    if not RERANK_URL or not candidates:
+        return None
+    try:
+        import httpx
+        payload = {
+            "query": query,
+            "documents": [(text or "")[:2000] for _cid, text in candidates],
+            "top_n": len(candidates),
+        }
+        with httpx.Client(timeout=RERANK_TIMEOUT) as c:
+            r = c.post(f"{RERANK_URL}/v1/rerank", json=payload)
+            r.raise_for_status()
+            results = r.json().get("results") or []
+        order = [candidates[item["index"]] for item in results
+                 if isinstance(item.get("index"), int) and 0 <= item["index"] < len(candidates)]
+        return order or None
+    except Exception as e:
+        log.warning("rerank failed, keeping retrieval order: %s", e)
+        return None
+
+
+def _rrf_fuse(*ranked_lists: list[str], k: int = RRF_K) -> list[tuple[str, float, dict]]:
+    """Reciprocal Rank Fusion. Each ranked list is a sequence of ids in
+    descending relevance. Returns [(id, fused_score, per_retriever_contrib), ...]
+    sorted by score desc."""
+    scores: dict[str, float] = {}
+    sources: dict[str, dict] = {}
+    names = ("dense", "bm25", "extra")
+    for idx, lst in enumerate(ranked_lists):
+        src = names[idx] if idx < len(names) else f"r{idx}"
+        for rank, ident in enumerate(lst, start=1):
+            scores[ident] = scores.get(ident, 0.0) + 1.0 / (k + rank)
+            sources.setdefault(ident, {})[src] = rank
+    ranked = sorted(scores.items(), key=lambda kv: -kv[1])
+    return [(ident, score, sources[ident]) for ident, score in ranked]
+
+
+def _source_url(bundle_id: str, page_id: str) -> str:
+    """Build the canonical docs portal URL for a (bundle, page) pair."""
+    b = _bundles().get(bundle_id)
+    if not b:
+        return ""
+    doc_id = b.get("doc_id", "")
+    if page_id.startswith("GUID-"):
+        return f"https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}&page={page_id}.html"
+    return f"https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}"
+
+
 # ===========================================================================
 # Tools
 # ===========================================================================
@@ -134,7 +259,7 @@ def search_docs(
     ] = None,
     k: Annotated[int, Field(description="Number of results to return.", ge=1, le=50)] = 10,
 ) -> str:
-    """Search the {product} docs corpus.
+    """Search the HPE Morpheus Enterprise (Morpheus) docs corpus.
 
     Returns the top-k most relevant chunks (with full source page URLs)
     given a natural-language query. Optional filters narrow the search
@@ -142,20 +267,130 @@ def search_docs(
     first if you need to discover the available facet values.
 
     Call this tool whenever the user asks anything that should be
-    answerable from the official product documentation.
+    answerable from the official product documentation — install,
+    upgrade, configuration, backups, networking, HVM clusters, the
+    Morpheus UI, or any 8.1.x release-notes question.
     """
     with TimedCall("search_docs", {
         "query": query, "version": version, "platform": platform,
         "bundle_id": bundle_id, "k": k,
     }) as _call:
-        # TODO Phase 2-3: query Chroma collection (see rag/index.py for
-        # how it was built). Render the top-k chunks as markdown with
-        # source URLs.
-        # TODO Phase 6: optional reranker via _rerank() if RERANK_URL set.
-        # TODO Phase 8: hybrid retrieval if HYBRID_SEARCH=true — run
-        # dense + BM25 in parallel, RRF-fuse, hand merged pool to rerank.
-        _call.set(hits_returned=0)
-        raise NotImplementedError("Phase 2/3: implement Chroma query + rendering")
+        try:
+            col = _collection()
+        except Exception as e:
+            log.exception("chroma collection unavailable")
+            _call.set(hits_returned=0, error=str(e))
+            return f"_(search backend unavailable: {e})_"
+
+        where = _build_where(version, platform, bundle_id)
+        bm25_where = _where_for_bm25(version, platform, bundle_id)
+        pool = max(k * 5, 50)
+
+        # Retrieval mode selection. Eval on this corpus (2026-05-22, 22 golden
+        # queries) showed BM25 MRR=0.88 vs dense MRR=0.54 vs hybrid MRR=0.69 —
+        # HPE structured docs use controlled vocabulary, so lexical match wins.
+        # Dense is kept as fallback when BM25 has no tokens to chew on (e.g.
+        # purely stopword queries). HYBRID_SEARCH=true forces RRF fusion.
+        bm = _bm25()
+        docs: list[str] = []
+        metas: list[dict] = []
+        dists: list[float] = []
+        retrieval_mode = "dense"
+        top1_source = "dense_only"
+
+        if HYBRID_SEARCH and bm is not None:
+            try:
+                dense_res = col.query(query_texts=[query], n_results=pool, where=where)
+                dense_ids = (dense_res.get("ids") or [[]])[0]
+                bm_hits = bm.query(query, n=pool, where=bm25_where)
+                bm_ids = [cid for cid, _s in bm_hits]
+                fused = _rrf_fuse(dense_ids, bm_ids)
+                docs, metas, dists = _enrich_from_chroma(col, [c for c, _, _ in fused[:k]], fused)
+                if fused:
+                    src0 = fused[0][2]
+                    top1_source = ("both" if {"dense", "bm25"} <= set(src0)
+                                   else "bm25_only" if "bm25" in src0
+                                   else "dense_only")
+                retrieval_mode = "hybrid"
+            except Exception as e:
+                log.warning("hybrid failed, falling back to BM25→dense: %s", e)
+
+        if not docs and bm is not None:
+            try:
+                bm_hits = bm.query(query, n=k, where=bm25_where)
+                if bm_hits:
+                    ids = [cid for cid, _s in bm_hits[:k]]
+                    docs, metas, _ = _enrich_from_chroma(col, ids, None)
+                    # FTS5 returns negative scores (lower=better). Map onto a
+                    # similarity-ish [0..1] just for display.
+                    dists = [max(0.0, min(1.0, 1.0 - abs(s) / 20.0)) for _id, s in bm_hits[:k]]
+                    retrieval_mode = "bm25"
+                    top1_source = "bm25_only"
+            except Exception as e:
+                log.warning("BM25 retrieval failed, falling back to dense: %s", e)
+
+        if not docs:
+            res = col.query(query_texts=[query], n_results=k, where=where)
+            docs = (res.get("documents") or [[]])[0]
+            metas = (res.get("metadatas") or [[]])[0]
+            dists = (res.get("distances") or [[]])[0]
+
+        reranker_fired = False
+        if RERANK_URL and docs:
+            # Pull a deeper pool to give the reranker something to chew on.
+            # We over-fetch up to RERANK_POOL chunks from whichever retriever
+            # already won, then ask the reranker to pick the final top-k.
+            pool_size = max(k, RERANK_POOL)
+            if len(docs) < pool_size:
+                if retrieval_mode == "bm25":
+                    extra = bm.query(query, n=pool_size, where=bm25_where) if bm else []
+                    extra_ids = [cid for cid, _s in extra]
+                else:
+                    extra_res = col.query(query_texts=[query], n_results=pool_size, where=where)
+                    extra_ids = (extra_res.get("ids") or [[]])[0]
+                if extra_ids:
+                    d2, m2, _ = _enrich_from_chroma(col, extra_ids, None)
+                    docs, metas = d2, m2
+                    dists = [0.0] * len(docs)
+            # Reranker scores chunk_ids — collapse to (id, text) tuples
+            pairs = list(zip(
+                [f"{m.get('bundle_id','')}::{m.get('page_id','')}::{m.get('ordinal',0)}" for m in metas],
+                docs,
+            ))
+            reranked = _rerank(query, pairs)
+            if reranked is not None:
+                # Re-sort docs/metas to match. Recompute distances as descending
+                # ordinal ranks so display still shows a useful score.
+                by_cid = {p[0]: i for i, p in enumerate(pairs)}
+                order = [by_cid[cid] for cid, _t in reranked if cid in by_cid]
+                docs = [docs[i] for i in order][:k]
+                metas = [metas[i] for i in order][:k]
+                dists = [1.0 - (rank / len(reranked)) for rank, _ in enumerate(reranked)][:len(docs)]
+                reranker_fired = True
+            else:
+                docs, metas, dists = docs[:k], metas[:k], dists[:k]
+
+        _call.set(hits_returned=len(docs), retrieval_mode=retrieval_mode,
+                  top1_source=top1_source, reranker_fired=reranker_fired)
+        if not docs:
+            return f"_No matches for `{query}`._"
+
+        out = [f"# {len(docs)} result(s) for `{query}`", ""]
+        for doc, meta, dist in zip(docs, metas, dists):
+            bid = meta.get("bundle_id", "")
+            pid = meta.get("page_id", "")
+            title = meta.get("title") or pid
+            ver = meta.get("version") or ""
+            url = _source_url(bid, pid)
+            header = f"## {title}"
+            if ver:
+                header += f"  _(v{ver})_"
+            out.append(header)
+            out.append(f"[{bid}/{pid}]({url}) · score={1 - dist:.3f}")
+            out.append("")
+            out.append(doc.strip())
+            out.append("")
+        return "\n".join(out)
 
 
 @mcp.tool()
@@ -175,9 +410,21 @@ def get_page(
             return f"Page not found: {bundle_id}/{page_id}"
         md, meta = data
         _call.set(found=True, page_chars=len(md))
-        # TODO: add a metadata header (title, version, source URL) above
-        # the body. Product-specific shape.
-        return md
+        title = meta.get("title") or page_id
+        ver = meta.get("version")
+        parent = meta.get("parent_title")
+        url = _source_url(bundle_id, page_id)
+        header = [f"# {title}"]
+        ctx = []
+        if ver:
+            ctx.append(f"version **{ver}**")
+        if parent:
+            ctx.append(f"in **{parent}**")
+        if ctx:
+            header.append("_" + " · ".join(ctx) + "_")
+        header.append(f"[source]({url})")
+        header.append("")
+        return "\n".join(header) + "\n" + md
 
 
 @mcp.tool()
@@ -193,45 +440,835 @@ def list_versions() -> str:
         versions = sorted({b.get("version") for b in cat.values() if b.get("version")})
         platforms = sorted({b.get("platform") for b in cat.values() if b.get("platform")})
         _call.set(versions=len(versions), platforms=len(platforms))
+        products = sorted({b.get("product") for b in cat.values() if b.get("product")})
         lines = [f"# Facets across {len(cat)} bundle(s)", ""]
         if versions:
-            lines.append("## Versions"); lines.append("")
-            for v in versions: lines.append(f"- `{v}`")
-            lines.append("")
+            lines += ["## Versions", ""] + [f"- `{v}`" for v in versions] + [""]
         if platforms:
-            lines.append("## Platforms"); lines.append("")
-            for p in platforms: lines.append(f"- `{p}`")
+            lines += ["## Platforms", ""] + [f"- `{p}`" for p in platforms] + [""]
+        if products:
+            lines += ["## Product / doc types", ""] + [f"- {p}" for p in products] + [""]
+        lines += ["## Bundles", ""]
+        for slug in sorted(cat):
+            b = cat[slug]
+            kind = b.get("product") or ""
+            ver = b.get("version")
+            pages = b.get("page_count", "?")
+            label = f"{kind} {ver}".strip() if ver else kind
+            lines.append(f"- `{slug}` — {label} ({pages} pages)")
         return "\n".join(lines)
 
 
-# ---------------------------------------------------------------------------
-# Stubs for later phases — keep the signatures in this file so refactors
-# don't lose the contracts. Implementations come per phase.
-# ---------------------------------------------------------------------------
+# ===========================================================================
+# Phase 9 — cross-version tools
+# ===========================================================================
 
-# @mcp.tool()  # Phase 9
-# def list_cluster(bundle_id: str, page_id: str) -> str: ...
+def _bundle_pages(bundle_id: str) -> set[str]:
+    """Page IDs (= GUID-XXXX) on disk in a bundle. Mirrors rag.index's md_path.stem."""
+    bd = CORPUS / bundle_id
+    if not bd.is_dir():
+        return set()
+    return {p.stem for p in bd.glob("*.md")}
 
-# @mcp.tool()  # Phase 9
-# def diff_versions(bundle_id: str, page_id: str, against_bundle_id: str, context: int = 3) -> str: ...
 
-# @mcp.tool()  # Phase 9
-# def bundle_changelog(bundle_id_new: str, bundle_id_old: str, min_churn: int = 5, max_changed: int = 50) -> str: ...
+def _diff_churn(a: str, b: str) -> tuple[int, int]:
+    """Cheap (added, removed) line counts for a pair of markdown bodies."""
+    diff = difflib.unified_diff(a.splitlines(keepends=False),
+                                b.splitlines(keepends=False), n=0)
+    added = removed = 0
+    for line in diff:
+        if line.startswith(("+++", "---", "@@")):
+            continue
+        if line.startswith("+"):
+            added += 1
+        elif line.startswith("-"):
+            removed += 1
+    return added, removed
 
-# @mcp.tool()  # Phase 13
-# def weekly_digest(days: int = 7, version: str | None = None, platform: str | None = None, ...) -> str: ...
 
-# @mcp.tool()  # Phase 9 (or 3 — useful early)
-# def corpus_status() -> str: ...
+@mcp.tool()
+def list_cluster(
+    bundle_id: Annotated[str, Field(description="Bundle slug of the source topic.")],
+    page_id: Annotated[str, Field(description="Page id (GUID-XXXX) of the source topic.")],
+) -> str:
+    """List cross-version peers of a topic in the HVM docs.
 
-# @mcp.tool()  # Phase 11
-# def myproduct_api_lessons(topic: str | None = None) -> str: ...
+    HPE re-mints the docId per product version but keeps page GUIDs stable,
+    so the scrape pipeline synthesizes `topic_cluster.clustered_topics`
+    from same-GUID overlap (374/376/376 pages overlap across 8.1.0/.1/.2).
+    """
+    with TimedCall("list_cluster", {"bundle_id": bundle_id, "page_id": page_id}) as _call:
+        out = _read_page(bundle_id, page_id)
+        if out is None:
+            _call.set(found=False)
+            return f"Not found: {bundle_id}/{page_id}"
+        _, side = out
+        cluster = side.get("topic_cluster") or {}
+        peers = cluster.get("clustered_topics") or []
+        _call.set(hits_returned=len(peers))
+        src_label = cluster.get("clustering_title") or side.get("title") or page_id
+        lines = [f"# Cluster for {bundle_id}/{page_id}  ({src_label})", ""]
+        if not peers:
+            lines.append("_No peer topics in cluster._")
+            return "\n".join(lines)
+        for p in peers:
+            lines.append(f"- `{p['bundle_id']}/{p['page_id']}`  —  {p.get('clustering_title') or ''}")
+        return "\n".join(lines)
 
-# @mcp.tool()  # Phase 12
-# def find_doc_inconsistencies(scope_query: str, ...) -> str: ...
 
-# @mcp.tool()  # Phase 12
-# def submit_doc_bug(page_url: str, content: str, email: str | None = None, ...) -> str: ...
+@mcp.tool()
+def diff_versions(
+    bundle_id: Annotated[str, Field(description="Bundle slug of the source topic (the 'new' side).")],
+    page_id: Annotated[str, Field(description="Page id of the source topic.")],
+    against_bundle_id: Annotated[str, Field(description="Bundle slug to diff against. Must be in the source's cluster, or share the same page_id.")],
+    context: Annotated[int, Field(description="Lines of context around each hunk.", ge=0, le=10)] = 3,
+) -> str:
+    """Unified diff of one topic between two bundles (typically two HVM versions).
+
+    Two matching strategies, tried in order:
+
+      1. `topic_cluster` peer (synthesized from same-GUID overlap by the scraper).
+      2. Same `page_id` fallback (works because GUIDs are stable across HVM versions).
+    """
+    with TimedCall("diff_versions", {
+        "bundle_id": bundle_id, "page_id": page_id,
+        "against_bundle_id": against_bundle_id, "context": context,
+    }) as _call:
+        src = _read_page(bundle_id, page_id)
+        if src is None:
+            _call.set(matched_via=None, reason="source_not_found")
+            return f"Source not found: {bundle_id}/{page_id}"
+        src_md, side = src
+        cluster = side.get("topic_cluster") or {}
+        peers = {p["bundle_id"]: p for p in (cluster.get("clustered_topics") or [])}
+
+        peer = peers.get(against_bundle_id)
+        if peer is not None:
+            peer_page_id = peer["page_id"]
+            matched_via = "topic_cluster"
+        elif _read_page(against_bundle_id, page_id) is not None:
+            peer_page_id = page_id
+            matched_via = "filename"
+        else:
+            _call.set(matched_via=None, reason="no_peer")
+            valid = list(peers) or ["(no peers)"]
+            return (f"No match for {bundle_id}/{page_id} in {against_bundle_id}.\n"
+                    f"- No cluster peer. Available peers: {valid}\n"
+                    f"- No page {page_id!r} in {against_bundle_id} either.")
+
+        _call.set(matched_via=matched_via)
+        peer_data = _read_page(against_bundle_id, peer_page_id)
+        if peer_data is None:
+            return f"Peer not found in corpus: {against_bundle_id}/{peer_page_id}"
+        peer_md, _ = peer_data
+        diff = difflib.unified_diff(peer_md.splitlines(keepends=True),
+                                     src_md.splitlines(keepends=True),
+                                     fromfile=f"{against_bundle_id}/{peer_page_id}",
+                                     tofile=f"{bundle_id}/{page_id}",
+                                     n=context)
+        body = "".join(diff)
+        header = f"_matched via {matched_via}_\n\n"
+        if not body.strip():
+            return header + f"No differences between {bundle_id}/{page_id} and {against_bundle_id}/{peer_page_id}."
+        return header + f"```diff\n{body}```"
+
+
+@mcp.tool()
+def bundle_changelog(
+    bundle_id_new: Annotated[str, Field(description="New-side bundle slug, e.g. 'hvm_user_manual_8_1_2'.")],
+    bundle_id_old: Annotated[str, Field(description="Old-side bundle slug, e.g. 'hvm_user_manual_8_1_1'.")],
+    min_churn: Annotated[int, Field(description="Min (added + removed) lines to flag a page as changed.", ge=1, le=1000)] = 5,
+    max_changed: Annotated[int, Field(description="Max changed pages to list (sorted by churn desc).", ge=1, le=500)] = 50,
+) -> str:
+    """High-level diff between two HVM bundles.
+
+    Lists pages added, removed, and changed between an old bundle and a
+    new one. Match is by page_id (which is the stable GUID — same GUID
+    across versions = same topic). Use after `list_versions` to discover
+    valid bundle slugs.
+    """
+    with TimedCall("bundle_changelog", {
+        "bundle_id_new": bundle_id_new, "bundle_id_old": bundle_id_old,
+        "min_churn": min_churn, "max_changed": max_changed,
+    }) as _call:
+        new_pages = _bundle_pages(bundle_id_new)
+        old_pages = _bundle_pages(bundle_id_old)
+        if not new_pages and not old_pages:
+            _call.set(reason="both_empty")
+            return f"Neither bundle has pages on disk: {bundle_id_new}, {bundle_id_old}"
+        if not new_pages:
+            return f"Bundle not found or empty: {bundle_id_new}"
+        if not old_pages:
+            return f"Bundle not found or empty: {bundle_id_old}"
+
+        added = sorted(new_pages - old_pages)
+        removed = sorted(old_pages - new_pages)
+        common = sorted(new_pages & old_pages)
+
+        changed: list[tuple[str, int, int]] = []
+        for pid in common:
+            n = _read_page(bundle_id_new, pid)
+            o = _read_page(bundle_id_old, pid)
+            if n is None or o is None:
+                continue
+            a_lines, r_lines = _diff_churn(o[0], n[0])
+            if a_lines + r_lines >= min_churn:
+                changed.append((pid, a_lines, r_lines))
+        changed.sort(key=lambda t: -(t[1] + t[2]))
+        _call.set(added=len(added), removed=len(removed),
+                  changed=len(changed), unchanged=len(common) - len(changed))
+
+        lines = [
+            f"# Bundle changelog: {bundle_id_new} vs {bundle_id_old}", "",
+            f"- pages in new: **{len(new_pages)}**",
+            f"- pages in old: **{len(old_pages)}**",
+            f"- common: **{len(common)}**",
+            f"- **added** (in new only): {len(added)}",
+            f"- **removed** (in old only): {len(removed)}",
+            f"- **changed** (≥{min_churn} lines): {len(changed)} of {len(common)} common",
+            f"- unchanged: {len(common) - len(changed)}", "",
+        ]
+        if added:
+            lines += [f"## Added pages ({len(added)})", *(f"- `{p}`" for p in added), ""]
+        if removed:
+            lines += [f"## Removed pages ({len(removed)})", *(f"- `{p}`" for p in removed), ""]
+        if changed:
+            shown = changed[:max_changed]
+            lines += [
+                f"## Changed pages — top {len(shown)} of {len(changed)} by churn", "",
+                "| page | +lines | -lines | total |", "|---|---|---|---|",
+            ]
+            for p, a, r in shown:
+                lines.append(f"| `{p}` | +{a} | -{r} | {a + r} |")
+            if len(changed) > max_changed:
+                lines.append(f"\n_({len(changed) - max_changed} more changed pages omitted; raise `max_changed` to see them.)_")
+            lines.append("\nInspect a specific page: `diff_versions(bundle_id_new, page_id, bundle_id_old)`.")
+        return "\n".join(lines)
+
+
+# ===========================================================================
+# Phase 13 — weekly digest from corpus/.digest/history.jsonl (built in CI)
+# ===========================================================================
+
+_digest_cache: list[dict] | None = None
+
+
+def _digest_history() -> list[dict]:
+    """Lazy load of the digest history JSONL written by scrape.changelog at CI time."""
+    global _digest_cache
+    if _digest_cache is not None:
+        return _digest_cache
+    if not DIGEST_HISTORY_PATH.exists():
+        log.warning("digest history not found at %s — weekly_digest will return empty.",
+                    DIGEST_HISTORY_PATH)
+        _digest_cache = []
+        return _digest_cache
+    records: list[dict] = []
+    try:
+        with open(DIGEST_HISTORY_PATH) as fh:
+            for ln, line in enumerate(fh, start=1):
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    records.append(json.loads(line))
+                except json.JSONDecodeError as e:
+                    log.warning("digest history: skipping malformed line %d: %s", ln, e)
+    except OSError as e:
+        log.warning("digest history read failed: %s", e)
+    _digest_cache = records
+    return _digest_cache
+
+
+@mcp.tool()
+def weekly_digest(
+    days: Annotated[int, Field(description="How far back to summarize. 7=last week, 30=last month. Horizon ~120 days.", ge=1, le=120)] = 7,
+    version: Annotated[str | None, Field(description="OPTIONAL version filter, e.g. '8.1.2'.")] = None,
+    platform: Annotated[str | None, Field(description="OPTIONAL platform filter (HVM bundles don't set platform — leave None).")] = None,
+    max_bundles: Annotated[int, Field(description="Cap on per-bundle detail blocks.", ge=1, le=100)] = 25,
+    max_pages_per_bundle: Annotated[int, Field(description="Pages to list per bundle.", ge=1, le=50)] = 10,
+) -> str:
+    """Summarize what changed in the HVM docs over the past N days.
+
+    Call when the user asks *"what's new in HVM docs this week?"*,
+    *"what changed in 8.1.2?"*, or *"is there anything new since the
+    last release?"*. Reads the pre-baked digest history JSONL written
+    by CI from git log over corpus-touching commits.
+    """
+    with TimedCall("weekly_digest", {
+        "days": days, "version": version, "platform": platform,
+        "max_bundles": max_bundles, "max_pages_per_bundle": max_pages_per_bundle,
+    }) as _call:
+        records = _digest_history()
+        if not records:
+            _call.set(returned="empty_no_history", record_count=0)
+            return ("# Weekly digest\n\n"
+                    f"_No digest history on this image. `{DIGEST_HISTORY_PATH}` is "
+                    "missing — it's populated by the weekly refresh workflow._")
+
+        now = _dt.datetime.now(_dt.timezone.utc)
+        cutoff = now - _dt.timedelta(days=days)
+        filtered: list[dict] = []
+        for r in records:
+            try:
+                ts = _dt.datetime.fromisoformat(r["timestamp"])
+            except (KeyError, ValueError):
+                continue
+            if ts.tzinfo is None:
+                ts = ts.replace(tzinfo=_dt.timezone.utc)
+            if ts >= cutoff:
+                filtered.append({**r, "_ts": ts})
+
+        if not filtered:
+            _call.set(returned="empty_window", record_count=0)
+            covers = ""
+            if records:
+                oldest = min(records, key=lambda r: r.get("timestamp", ""))
+                newest = max(records, key=lambda r: r.get("timestamp", ""))
+                covers = (f"\n\n_(History on this image covers "
+                          f"{oldest.get('timestamp','?')[:10]} through "
+                          f"{newest.get('timestamp','?')[:10]}.)_")
+            return (f"# Weekly digest — last {days} day{'s' if days != 1 else ''}\n\n"
+                    f"_No corpus changes recorded in this window._" + covers)
+
+        cat = _bundles()
+        def _passes(bid: str) -> bool:
+            if not (version or platform):
+                return True
+            b = cat.get(bid)
+            if b is None:
+                return False
+            if version and b.get("version") != version:
+                return False
+            if platform and b.get("platform") != platform:
+                return False
+            return True
+
+        filtered.sort(key=lambda r: r["_ts"], reverse=True)
+        per_bundle_pages: dict[str, list[str]] = {}
+        new_bundles_set: set[str] = set()
+        drift_bundles_set: set[str] = set()
+        commits_in_window = 0
+        for r in filtered:
+            commits_in_window += 1
+            for bid in r.get("new_bundles", []):
+                if _passes(bid):
+                    new_bundles_set.add(bid)
+            for bid in r.get("json_only_bundles", []):
+                if _passes(bid):
+                    drift_bundles_set.add(bid)
+            for bid, pages in (r.get("content_bundles") or {}).items():
+                if not _passes(bid):
+                    continue
+                seen = set(per_bundle_pages.get(bid, []))
+                fresh = [p for p in pages if p not in seen]
+                if fresh:
+                    per_bundle_pages.setdefault(bid, []).extend(fresh)
+
+        total_md = sum(len(p) for p in per_bundle_pages.values())
+        bundles_ranked = sorted(per_bundle_pages.items(), key=lambda kv: (-len(kv[1]), kv[0]))
+        _call.set(returned="ok", record_count=commits_in_window,
+                  bundles_changed=len(per_bundle_pages),
+                  new_bundles=len(new_bundles_set))
+
+        ts_oldest = filtered[-1]["_ts"].date().isoformat()
+        ts_newest = filtered[0]["_ts"].date().isoformat()
+        lines = [
+            f"# HVM docs digest — last {days} day{'s' if days != 1 else ''}", "",
+            f"_Window: {ts_oldest} → {ts_newest}_  •  _Filters: version={version}, platform={platform}_", "",
+            "## Headline", "",
+            f"- **{total_md}** page change(s) across **{len(per_bundle_pages)}** bundle(s)",
+            f"- **{commits_in_window}** corpus-touching commit(s) in this window",
+            f"- **{len(new_bundles_set)}** bundle(s) newly added",
+            f"- **{len(drift_bundles_set)}** bundle(s) with sidecar-only drift", "",
+        ]
+        if not per_bundle_pages and not new_bundles_set:
+            lines.append(f"_No bundle changes matched the filter in this window._")
+            return "\n".join(lines)
+        if new_bundles_set:
+            lines += ["## New bundles added", ""]
+            for bid in sorted(new_bundles_set):
+                b = cat.get(bid, {})
+                t = b.get("title") or ""
+                tag = f"  *({b.get('version') or '?'})*" if b.get("version") else ""
+                lines.append(f"- `{bid}`{tag} {t}")
+            lines.append("")
+        if bundles_ranked:
+            top = bundles_ranked[:max_bundles]
+            remainder = len(bundles_ranked) - len(top)
+            lines += [f"## Bundles with content changes — top {len(top)}" +
+                      (f" of {len(bundles_ranked)}" if remainder else ""), ""]
+            for bid, pages in top:
+                b = cat.get(bid, {})
+                tag = f"  *({b.get('version') or ''})*" if b.get("version") else ""
+                lines.append(f"### `{bid}`{tag}")
+                if b.get("title"):
+                    lines.append(f"_{b['title']}_")
+                lines.append(f"{len(pages)} page change(s).")
+                for p in pages[:max_pages_per_bundle]:
+                    lines.append(f"- `{p}`")
+                if len(pages) > max_pages_per_bundle:
+                    lines.append(f"  _(+{len(pages) - max_pages_per_bundle} more)_")
+                lines.append("")
+        lines.append("\nInspect a specific page: `get_page(bundle_id, page_id)` or `diff_versions(...)`.")
+        return "\n".join(lines)
+
+
+@mcp.tool()
+def corpus_status() -> str:
+    """Freshness + size of the knowledge base.
+
+    Combines: (1) image build time (bundles.json mtime in container),
+    (2) most-recent upstream Published date across bundles, (3) total
+    bundles / pages / Chroma chunks.
+    """
+    lines: list[str] = ["# Corpus status", ""]
+    try:
+        ts = _dt.datetime.fromtimestamp(BUNDLES_JSON.stat().st_mtime, tz=_dt.timezone.utc).isoformat(timespec="seconds")
+        lines.append(f"- This image built at: **{ts}**")
+    except OSError:
+        lines.append("- This image build time: _unknown_")
+
+    cat = _bundles()
+    latest_pub: str | None = None
+    per_bundle: list[tuple[str, str]] = []
+    for slug, b in cat.items():
+        pub = (b.get("dates") or {}).get("Published")
+        if pub:
+            if latest_pub is None or pub > latest_pub:
+                latest_pub = pub
+            per_bundle.append((slug, pub))
+    if latest_pub:
+        lines.append(f"- Most-recent upstream Published date (any bundle): **{latest_pub}**")
+    lines.append("")
+    try:
+        chunk_count = _collection().count()
+    except Exception:
+        chunk_count = -1
+    pages_count = sum(1 for d in (CORPUS.iterdir() if CORPUS.exists() else [])
+                       if d.is_dir() for _ in d.glob("*.md"))
+    lines += [
+        f"- Bundles indexed: **{len(cat)}**",
+        f"- Pages in corpus: **{pages_count}**",
+        f"- Chunks in Chroma: **{chunk_count}**" if chunk_count >= 0 else "- Chunks in Chroma: _(query failed)_",
+        "",
+    ]
+    if per_bundle:
+        per_bundle.sort(key=lambda kv: kv[1], reverse=True)
+        lines.append("## Most-recently-edited bundles (by HPE)")
+        for slug, when in per_bundle[:5]:
+            b = cat.get(slug, {})
+            lines.append(f"- `{slug}` — {b.get('title') or slug}  (published {when})")
+    return "\n".join(lines)
+
+
+# ===========================================================================
+# Phase 11 — curated knowledge: morpheus_api_lessons
+# ===========================================================================
+
+def _split_lessons_sections(md: str) -> list[tuple[str, str]]:
+    sections: list[tuple[str, str]] = []
+    current_title: str | None = None
+    current_lines: list[str] = []
+    for line in md.splitlines(keepends=True):
+        m = re.match(r"^##\s+(.+?)\s*$", line)
+        if m:
+            if current_lines:
+                sections.append((current_title or "(prelude)", "".join(current_lines)))
+            current_title = m.group(1).strip()
+            current_lines = [line]
+        else:
+            current_lines.append(line)
+    if current_lines:
+        sections.append((current_title or "(prelude)", "".join(current_lines)))
+    return sections
+
+
+@mcp.tool()
+def morpheus_api_lessons(
+    topic: Annotated[str | None, Field(description="Optional keyword filter — returns only H2 sections whose heading or body contains this substring. Examples: 'manager', 'agent upgrade', 'plugin api', 'worker', 'console keyboard'. Omit for the full doc.")] = None,
+) -> str:
+    """Curated lessons about HPE Morpheus Enterprise — non-obvious bits
+    that aren't in the official docs and gotchas learned from real
+    integration / operation work.
+
+    **Call this proactively whenever the user asks you to:**
+    - install, upgrade, or troubleshoot an HVM cluster or manager
+    - integrate with HVM (REST API, automation, scripting)
+    - upgrade across versions (8.1.0 → 8.1.1 → 8.1.2)
+    - work with HVM Host agents
+    - configure backups, networking, or storage
+    - elevate to HPE Morpheus Enterprise
+
+    With ``topic=...`` you'll get just the relevant H2 section(s). With
+    no argument you'll get the full doc — usually the right call when
+    starting on a new task since the TL;DR at the top primes the rest.
+    """
+    with TimedCall("morpheus_api_lessons", {"topic": topic}) as _call:
+        try:
+            md = API_LESSONS_MD.read_text()
+        except OSError as e:
+            _call.set(error=str(e))
+            return f"Lessons doc not present at {API_LESSONS_MD}: {e}"
+        if not topic:
+            _call.set(returned="full")
+            return md
+        needle = topic.lower()
+        sections = _split_lessons_sections(md)
+        kept: list[str] = []
+        for title, body in sections:
+            if needle in title.lower() or needle in body.lower():
+                kept.append(body)
+        if not kept:
+            _call.set(returned="empty", topic_matched=False)
+            return (f"_No sections matched topic={topic!r}. Returning the full document._\n\n" + md)
+        _call.set(returned="filtered", sections_matched=len(kept))
+        return f"_Filtered to {len(kept)} section(s) matching topic={topic!r}._\n\n" + "".join(kept)
+
+
+# ===========================================================================
+# Phase 12 — find_doc_inconsistencies + submit_doc_bug
+# ===========================================================================
+
+_REDIRECT_PHRASE_RE = re.compile(
+    r"\bsee\s+(?:the\s+)?[A-Z`\[][^.!?\n]{2,80}(?:for|topic|section|chapter|guide)\b",
+    re.IGNORECASE,
+)
+_VERSION_SUFFIX_RE = re.compile(r"_(\d+_\d+_\d+)$")
+
+
+def _bundle_family(bundle_id: str) -> str:
+    """Strip a trailing `_X_Y_Z` version suffix from an HVM bundle slug.
+
+    `hvm_user_manual_8_1_0` → `hvm_user_manual`
+    `hvm_deployment_guide`  → `hvm_deployment_guide` (no version)
+
+    Same-family bundles are version peers; cross-family pairs (User Manual
+    vs Release Notes) are intentionally different content.
+    """
+    return _VERSION_SUFFIX_RE.sub("", bundle_id)
+
+
+def _check_cross_version_drift(bundle_id: str, page_id: str, md: str, meta: dict) -> dict | None:
+    cluster = (meta.get("topic_cluster") or {}).get("clustered_topics") or []
+    if not cluster:
+        return None
+    src_family = _bundle_family(bundle_id)
+    src_lines = max(1, len(md.splitlines()))
+    in_band: list[tuple[int, str, str, int]] = []
+    out_band: list[tuple[int, str, str, int]] = []
+    for peer in cluster:
+        peer_bid = peer.get("bundle_id")
+        peer_pid = peer.get("page_id")
+        if not (peer_bid and peer_pid) or peer_bid == bundle_id:
+            continue
+        if _bundle_family(peer_bid) != src_family:
+            continue
+        peer_data = _read_page(peer_bid, peer_pid)
+        if peer_data is None:
+            continue
+        peer_md, _ = peer_data
+        added, removed = _diff_churn(md, peer_md)
+        churn = added + removed
+        peer_lines = max(1, len(peer_md.splitlines()))
+        denom = max(src_lines, peer_lines)
+        pct = (churn * 100) // denom if denom else 0
+        tup = (churn, peer_bid, peer_pid, peer_lines)
+        if 10 <= pct <= 60:
+            in_band.append(tup)
+        elif churn >= 5:
+            out_band.append(tup)
+    if in_band:
+        chosen = min(in_band, key=lambda t: t[0])
+        confidence = "high"
+    elif out_band:
+        chosen = min(out_band, key=lambda t: t[0])
+        confidence = "low"
+    else:
+        return None
+    churn, peer_bid, peer_pid, peer_lines = chosen
+    denom = max(src_lines, peer_lines)
+    churn_pct = min(100, (churn * 100) // denom) if denom else 0
+    return {
+        "check": "cross_version_drift",
+        "bundle_id": bundle_id, "page_id": page_id,
+        "page_url": _source_url(bundle_id, page_id),
+        "peer_bundle_id": peer_bid, "peer_page_id": peer_pid,
+        "churn_lines": churn, "churn_pct_of_file": churn_pct,
+        "confidence": confidence,
+        "summary": (f"Drifts {churn} lines (~{churn_pct}% of file) vs peer "
+                    f"{peer_bid}/{peer_pid}. Inspect with "
+                    f"diff_versions({bundle_id!r}, {page_id!r}, {peer_bid!r})."),
+    }
+
+
+def _check_redirect_chain(bundle_id: str, page_id: str, md: str, meta: dict) -> dict | None:
+    body = re.sub(r"^#[^\n]*\n", "", md, count=1).strip()
+    if "```" in body:
+        return None
+    text_only = re.sub(r"[`\[\]()*_>#-]", "", body)
+    text_only = re.sub(r"\s+", " ", text_only).strip()
+    if len(text_only) > 600:
+        return None
+    redirect_matches = list(_REDIRECT_PHRASE_RE.finditer(body))
+    if not redirect_matches:
+        return None
+    evidence = redirect_matches[0].group(0).strip()
+    return {
+        "check": "redirect_chain",
+        "bundle_id": bundle_id, "page_id": page_id,
+        "page_url": _source_url(bundle_id, page_id),
+        "body_chars": len(text_only),
+        "redirect_phrase": evidence[:200],
+        "confidence": "medium",
+        "summary": (f"Page is {len(text_only)} chars of body text with a "
+                    f'"see ... for ..." redirect: "{evidence[:120]}". '
+                    "Inspect with get_page to confirm."),
+    }
+
+
+@mcp.tool()
+def find_doc_inconsistencies(
+    scope_query: Annotated[str, Field(description="Natural-language scope describing what slice to scan. Used as a search to pick candidate pages. Examples: 'backup configuration', 'HVM cluster setup', 'VME manager installation'.")],
+    version: Annotated[str | None, Field(description="OPTIONAL version filter — e.g. '8.1.2'.")] = None,
+    platform: Annotated[str | None, Field(description="OPTIONAL platform filter (HVM bundles don't set platform — usually leave None).")] = None,
+    bundle_id: Annotated[str | None, Field(description="OPTIONAL specific bundle slug to restrict scanning to.")] = None,
+    max_pages: Annotated[int, Field(description="How many candidate pages to inspect.", ge=5, le=200)] = 30,
+    checks: Annotated[list[str] | None, Field(description="Which checks to run. Available: 'cross_version_drift', 'redirect_chain'. Defaults to all.")] = None,
+) -> str:
+    """Scan a scoped set of HVM docs pages for likely documentation bugs.
+
+    Surfaces concrete candidates for human review — NOT a stream of
+    bugs to auto-submit. Workflow:
+
+      1. Run this against a focused scope.
+      2. Review each finding; many will be false positives.
+      3. For real bugs, drill in with `get_page` / `diff_versions`.
+      4. Draft a bug report; show the operator; ask explicitly.
+      5. Only then call `submit_doc_bug`. One bug = one confirmation.
+
+    **Do NOT loop submissions.** Even on "submit them all", confirm each
+    one individually. HPE's docs queue is a shared resource.
+    """
+    with TimedCall("find_doc_inconsistencies", {
+        "scope_query": scope_query, "version": version, "platform": platform,
+        "bundle_id": bundle_id, "max_pages": max_pages, "checks": checks,
+    }) as _call:
+        all_checks = {"cross_version_drift", "redirect_chain"}
+        requested = all_checks if checks is None else {c for c in checks if c in all_checks}
+        if not requested:
+            _call.set(error="no_valid_checks")
+            return f"No valid checks requested. Available: {sorted(all_checks)}."
+        try:
+            col = _collection()
+        except Exception as e:
+            _call.set(error=f"collection: {e}")
+            return f"Couldn't open Chroma collection: {e}"
+        where = _build_where(version, platform, bundle_id)
+        try:
+            res = col.query(query_texts=[scope_query], n_results=max_pages * 3,
+                            where=where, include=["metadatas"])
+        except Exception as e:
+            _call.set(error=f"query: {e}")
+            return f"Scope query failed: {e}"
+        seen: set[tuple[str, str]] = set()
+        candidates: list[tuple[str, str]] = []
+        for meta in (res.get("metadatas") or [[]])[0]:
+            key = (meta.get("bundle_id") or "", meta.get("page_id") or "")
+            if not key[0] or not key[1] or key in seen:
+                continue
+            seen.add(key)
+            candidates.append(key)
+            if len(candidates) >= max_pages:
+                break
+        _call.set(pages_inspected=len(candidates), checks=sorted(requested))
+        if not candidates:
+            return f"No pages matched scope `{scope_query}`."
+        findings: dict[str, list[dict]] = {c: [] for c in requested}
+        for bid, pid in candidates:
+            data = _read_page(bid, pid)
+            if data is None:
+                continue
+            md, meta = data
+            if "cross_version_drift" in requested:
+                f = _check_cross_version_drift(bid, pid, md, meta)
+                if f:
+                    findings["cross_version_drift"].append(f)
+            if "redirect_chain" in requested:
+                f = _check_redirect_chain(bid, pid, md, meta)
+                if f:
+                    findings["redirect_chain"].append(f)
+        findings["cross_version_drift"] = sorted(
+            findings.get("cross_version_drift", []),
+            key=lambda f: (-(1 if f["confidence"] == "high" else 0), -f["churn_lines"]))
+        findings["redirect_chain"] = sorted(
+            findings.get("redirect_chain", []), key=lambda f: f["body_chars"])
+        total = sum(len(v) for v in findings.values())
+        _call.set(findings_total=total,
+                  findings_by_check={k: len(v) for k, v in findings.items()})
+        lines = [
+            f"# Doc inconsistency scan — {len(candidates)} pages inspected", "",
+            f"_Scope_: `{scope_query}`  •  _Filters_: version={version}, platform={platform}, bundle_id={bundle_id}  •  _Checks_: {sorted(requested)}", "",
+            f"**{total} candidate finding{'' if total == 1 else 's'}.** Review each individually. "
+            "For real bugs, follow up with `get_page` / `diff_versions`, draft the report, "
+            "show the operator, and only call `submit_doc_bug` after explicit confirmation.", "",
+        ]
+        if not total:
+            lines.append("_No findings in this scope._")
+            return "\n".join(lines)
+        for check in sorted(requested):
+            items = findings.get(check, [])
+            lines += [f"## {check}  ({len(items)})", ""]
+            if not items:
+                lines.append("_No findings for this check._\n")
+                continue
+            for i, f in enumerate(items, 1):
+                lines.append(f"### {i}. `{f['bundle_id']}/{f['page_id']}`  *({f['confidence']} confidence)*")
+                lines.append(f"- URL: {f['page_url']}")
+                lines.append(f"- {f['summary']}")
+                if check == "cross_version_drift":
+                    lines.append(f"- Peer: `{f['peer_bundle_id']}/{f['peer_page_id']}`  •  churn: {f['churn_lines']} lines ({f['churn_pct_of_file']}% of file)")
+                elif check == "redirect_chain":
+                    lines.append(f"- Body length: {f['body_chars']} chars  •  Phrase: *\"{f['redirect_phrase']}\"*")
+                lines.append("")
+        lines += ["---",
+                  "_Reminder: `submit_doc_bug` has a real side effect. Draft → show → confirm → submit, one at a time. Do not loop._"]
+        return "\n".join(lines)
+
+
+# --- submit_doc_bug ----------------------------------------------------------
+# HPE Support DocPortal's "Was this helpful?" widget POSTs to an endpoint
+# we haven't sniffed yet. Until DOC_BUG_API_URL is set AND
+# DOC_BUG_SUBMIT_ENABLED=true, this tool refuses submission and tells the
+# operator to paste manually. When you sniff the endpoint, set both env
+# vars and verify the payload shape against the schema below.
+
+_DOC_BUG_ALLOWED_HOSTS = {"support.hpe.com"}
+_EMAIL_RE = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
+
+
+@mcp.tool()
+def submit_doc_bug(
+    page_url: Annotated[str, Field(description="Full URL of the support.hpe.com page the bug is about. Must be a support.hpe.com URL.")],
+    content: Annotated[str, Field(description="Body of the bug report. Be specific: what the page says, what's wrong, what it should say. Cite exact passages. The docs team reads it verbatim.")],
+    email: Annotated[str | None, Field(description="OPTIONAL submitter email for follow-up. Omit if anonymous.")] = None,
+    rating: Annotated[int | None, Field(description="OPTIONAL star rating 1-5 (1-2 for serious bugs, 3 unclear, 4-5 only on explicit request).")] = None,
+    like: Annotated[bool | None, Field(description="OPTIONAL thumbs-up/down. False for bugs, True for positive feedback.")] = None,
+) -> str:
+    """Submit a documentation bug to HPE's docs feedback channel.
+
+    **⚠️  THIS TOOL HAS A REAL SIDE EFFECT (when enabled). It POSTs to
+    HPE's docs feedback endpoint and the submission lands in their queue.**
+
+    **MANDATORY operator-confirmation workflow:**
+
+    1. Draft the bug content yourself. Show the operator the exact text
+       you intend to submit + the page URL + any rating/email fields.
+    2. Ask explicitly: *"Submit this bug? (yes/no)"*
+    3. Only call submit_doc_bug AFTER they answer yes.
+    4. If they say *"submit them all"*, STILL confirm each one. This
+       tool MUST NOT be called in a loop without per-bug consent.
+
+    **Do not call this autonomously.** Don't preemptively submit while
+    exploring inconsistencies. Don't call inside an agent loop without
+    a human in the loop. Misuse will get this MCP blocked at HPE's WAF.
+
+    **What makes a good bug report:**
+    - Specific page URL. One bug = one page.
+    - Concrete quote of the problem text + version/platform context.
+    - Suggested correction when you have one.
+    - Avoid editorializing — factual bugs and broken links best.
+    """
+    with TimedCall("submit_doc_bug", {
+        "page_url": page_url, "content_len": len(content or ""),
+        "email_present": bool(email), "rating": rating, "like": like,
+    }) as _call:
+        if not DOC_BUG_SUBMIT_ENABLED:
+            _call.set(error="disabled", outcome="refused_disabled")
+            return (
+                "submit_doc_bug is disabled on this MCP deployment "
+                "(DOC_BUG_SUBMIT_ENABLED is not set). The operator's draft is good — "
+                f"they can paste it into the feedback widget on {page_url} themselves.\n\n"
+                "_(For maintainers: sniff HPE's feedback endpoint, set DOC_BUG_API_URL "
+                "to the POST target, and DOC_BUG_SUBMIT_ENABLED=true to activate.)_"
+            )
+        if not DOC_BUG_API_URL:
+            _call.set(error="no_endpoint", outcome="refused_disabled")
+            return ("submit_doc_bug is enabled but DOC_BUG_API_URL is empty. "
+                    f"Operator should paste manually at {page_url}.")
+        if not content or not content.strip():
+            _call.set(error="empty_content", outcome="refused_invalid")
+            return "Refused: empty `content`."
+        if len(content) > 10000:
+            _call.set(error="content_too_long", outcome="refused_invalid")
+            return f"Refused: `content` is {len(content)} chars (cap 10000)."
+        try:
+            from urllib.parse import urlparse
+            parsed = urlparse(page_url)
+        except Exception as e:
+            _call.set(error=f"url_parse: {e}", outcome="refused_invalid")
+            return f"Refused: couldn't parse page_url ({e})."
+        if parsed.scheme not in ("http", "https"):
+            _call.set(error="bad_scheme", outcome="refused_invalid")
+            return f"Refused: scheme must be http(s), got {parsed.scheme!r}."
+        if parsed.hostname not in _DOC_BUG_ALLOWED_HOSTS:
+            _call.set(error=f"bad_host: {parsed.hostname}", outcome="refused_invalid")
+            return (f"Refused: page_url host {parsed.hostname!r} isn't a "
+                    f"support.hpe.com URL. submit_doc_bug only accepts bugs against HPE Support pages.")
+        if email is not None and not _EMAIL_RE.match(email):
+            _call.set(error="bad_email", outcome="refused_invalid")
+            return f"Refused: email {email!r} doesn't look valid. Omit if anonymous."
+        if rating is not None and not (1 <= rating <= 5):
+            _call.set(error="bad_rating", outcome="refused_invalid")
+            return f"Refused: rating must be 1-5, got {rating}."
+
+        href = f"{parsed.scheme}://{parsed.hostname}{parsed.path}{('?' + parsed.query) if parsed.query else ''}"
+        payload: dict = {"content": content, "href": href}
+        if email:
+            payload["email"] = email
+        if rating is not None:
+            payload["rating"] = rating
+        if like is not None:
+            payload["like"] = like
+
+        try:
+            import httpx
+        except ImportError:
+            _call.set(error="httpx_missing", outcome="refused_runtime")
+            return "Refused: httpx not available."
+
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "User-Agent": "hvm-docs-mcp submit_doc_bug",
+            "Origin": "https://support.hpe.com",
+            "Referer": href,
+        }
+        try:
+            with httpx.Client(timeout=DOC_BUG_TIMEOUT) as c:
+                r = c.post(DOC_BUG_API_URL, json=payload, headers=headers)
+        except httpx.RequestError as e:
+            _call.set(error=f"transport: {e}", outcome="failed_transport")
+            return f"Submission failed (transport): {e}"
+
+        comment_id: object = None
+        body_summary = ""
+        try:
+            resp_json = r.json()
+            comment_id = resp_json.get("commentId") or resp_json.get("id")
+            body_summary = json.dumps(resp_json)[:300]
+        except (ValueError, json.JSONDecodeError):
+            body_summary = (r.text or "")[:300]
+        _call.set(http_status=r.status_code, comment_id=comment_id,
+                  outcome=("submitted" if r.is_success else "rejected_upstream"))
+        if r.is_success:
+            id_note = f" (commentId={comment_id})" if comment_id else ""
+            return f"Submitted. HTTP {r.status_code}{id_note}. HPE docs team will see this for {href}."
+        if r.status_code in (401, 403, 429):
+            return (f"Submission rejected upstream (HTTP {r.status_code}). "
+                    "Likely captcha/auth/rate-limit on anonymous POSTs. "
+                    f"Operator can paste manually at {href}.\n\nResponse (truncated): {body_summary}")
+        return f"Submission rejected upstream (HTTP {r.status_code}). Response (truncated): {body_summary}"
 
 
 # ===========================================================================
diff --git a/eval/queries.jsonl b/eval/queries.jsonl
new file mode 100644
index 0000000..98c50ce
--- /dev/null
+++ b/eval/queries.jsonl
@@ -0,0 +1,4 @@
+{"query": "what's the per-socket licensing model for Morpheus Enterprise", "expected": [{"bundle_id": "morpheus_quickspecs", "page_id": "a50009231enw"}], "tags": ["licensing", "skus"]}
+{"query": "add an AWS cloud integration", "expected": [], "tags": ["cloud", "TODO-populate-after-first-scrape"]}
+{"query": "Plugin API version compatibility", "expected": [], "tags": ["api", "TODO"]}
+{"query": "Morpheus Enterprise 8.1.2 what's new", "expected": [{"bundle_id": "morpheus_release_notes_8_1_2", "page_id": "sd00007733en_us"}], "tags": ["release-notes"]}
diff --git a/eval/retrievers.py b/eval/retrievers.py
index bc06a18..872cf31 100644
--- a/eval/retrievers.py
+++ b/eval/retrievers.py
@@ -10,7 +10,7 @@ to one entry; the highest-ranked chunk's position wins).
 """
 from __future__ import annotations
 
-from typing import Protocol, Iterable
+from typing import Iterable, Protocol
 
 
 class Retriever(Protocol):
@@ -21,12 +21,17 @@ class Retriever(Protocol):
         ...
 
 
-def _collapse_to_pages(chunk_ids: Iterable[tuple[str, str, str]], k: int) -> list[tuple[str, str]]:
-    """Take a stream of (bundle_id, page_id, chunk_ordinal) and return
-    the first k unique pages in their first-seen order."""
+def _split_chunk_id(chunk_id: str) -> tuple[str, str, int]:
+    """`bundle::page::ordinal` -> (bundle, page, int(ordinal))."""
+    bid, pid, ordinal = chunk_id.split("::")
+    return bid, pid, int(ordinal)
+
+
+def _collapse_to_pages(chunk_ids: Iterable[str], k: int) -> list[tuple[str, str]]:
     seen: set[tuple[str, str]] = set()
     out: list[tuple[str, str]] = []
-    for bid, pid, _ord in chunk_ids:
+    for cid in chunk_ids:
+        bid, pid, _ord = _split_chunk_id(cid)
         key = (bid, pid)
         if key in seen:
             continue
@@ -37,26 +42,111 @@ def _collapse_to_pages(chunk_ids: Iterable[tuple[str, str, str]], k: int) -> lis
     return out
 
 
-# TODO Phase 2/3 — implement these once Chroma + the bm25 module are
-# in place. Each one is small (15-30 LOC). The eval harness imports
-# from this module by class name.
-#
-# class DenseRetriever:
-#     name = "dense"
-#     def __init__(self, collection): self.col = collection
-#     def retrieve(self, query, k=10): ...
-#
-# class RerankedRetriever:
-#     name = "dense+rerank"
-#     def __init__(self, collection, rerank_url, pool=200): ...
-#     def retrieve(self, query, k=10): ...
-#
-# class BM25Retriever:
-#     name = "bm25"
-#     def __init__(self, bm25_index): ...
-#     def retrieve(self, query, k=10): ...
-#
-# class HybridRetriever:
-#     name = "bm25+dense+rrf"
-#     def __init__(self, dense, bm25, k_rrf=60): ...
-#     def retrieve(self, query, k=10): ...
+class DenseRetriever:
+    """Chroma cosine search via the live embedding function."""
+    name = "dense"
+
+    def __init__(self, collection, pool: int = 50):
+        self.col = collection
+        self.pool = pool
+
+    def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]:
+        res = self.col.query(query_texts=[query], n_results=self.pool)
+        ids = (res.get("ids") or [[]])[0]
+        return _collapse_to_pages(ids, k)
+
+
+class BM25Retriever:
+    """SQLite FTS5 lexical search."""
+    name = "bm25"
+
+    def __init__(self, bm25_index, pool: int = 200):
+        self.bm = bm25_index
+        self.pool = pool
+
+    def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]:
+        hits = self.bm.query(query, n=self.pool)
+        return _collapse_to_pages((cid for cid, _score in hits), k)
+
+
+class HybridRetriever:
+    """Reciprocal Rank Fusion of dense + BM25 rankings."""
+    name = "hybrid_rrf"
+
+    def __init__(self, dense: DenseRetriever, bm25: BM25Retriever, k_rrf: int = 60, pool: int = 100):
+        self.dense = dense
+        self.bm25 = bm25
+        self.k_rrf = k_rrf
+        self.pool = pool
+
+    def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]:
+        dense_pages = self.dense.retrieve(query, k=self.pool)
+        bm25_pages = self.bm25.retrieve(query, k=self.pool)
+        scores: dict[tuple[str, str], float] = {}
+        for rank, page in enumerate(dense_pages, start=1):
+            scores[page] = scores.get(page, 0.0) + 1.0 / (self.k_rrf + rank)
+        for rank, page in enumerate(bm25_pages, start=1):
+            scores[page] = scores.get(page, 0.0) + 1.0 / (self.k_rrf + rank)
+        ranked = sorted(scores.items(), key=lambda kv: -kv[1])
+        return [page for page, _s in ranked[:k]]
+
+
+def _rerank_pool(rerank_url: str, query: str, ids_and_texts: list[tuple[str, str]],
+                 timeout: float = 30.0) -> list[str] | None:
+    """POST to /v1/rerank, return ids in reranked order. None on failure."""
+    if not ids_and_texts:
+        return []
+    import httpx
+    try:
+        with httpx.Client(timeout=timeout) as c:
+            r = c.post(f"{rerank_url}/v1/rerank", json={
+                "query": query,
+                "documents": [(t or "")[:2000] for _i, t in ids_and_texts],
+                "top_n": len(ids_and_texts),
+            })
+            r.raise_for_status()
+            results = r.json().get("results") or []
+        return [ids_and_texts[item["index"]][0] for item in results
+                if isinstance(item.get("index"), int)
+                and 0 <= item["index"] < len(ids_and_texts)]
+    except Exception:
+        return None
+
+
+class RerankedRetriever:
+    """Pull a candidate pool via a base retriever, then cross-encoder re-rank."""
+
+    def __init__(self, base: Retriever, collection, rerank_url: str, name_suffix: str = "rerank",
+                 pool: int = 50, timeout: float = 30.0):
+        self.base = base
+        self.col = collection
+        self.url = rerank_url
+        self.name = f"{base.name}+{name_suffix}"
+        self.pool = pool
+        self.timeout = timeout
+
+    def retrieve(self, query: str, k: int = 10) -> list[tuple[str, str]]:
+        # Base returns deduplicated page-level tuples; rerank needs CHUNK-level
+        # texts to be informative. Pull each page's chunk 0 text from Chroma.
+        pages = self.base.retrieve(query, k=self.pool)
+        if not pages:
+            return []
+        chunk_ids = [f"{bid}::{pid}::0" for bid, pid in pages]
+        g = self.col.get(ids=chunk_ids, include=["documents"])
+        by_id = dict(zip(g["ids"], g["documents"]))
+        ids_and_texts = [(cid, by_id.get(cid, "")) for cid in chunk_ids]
+        order = _rerank_pool(self.url, query, ids_and_texts, timeout=self.timeout)
+        if order is None:
+            return pages[:k]
+        out: list[tuple[str, str]] = []
+        seen: set[tuple[str, str]] = set()
+        for cid in order:
+            bid, pid, _ = cid.split("::")
+            key = (bid, pid)
+            if key in seen:
+                continue
+            seen.add(key)
+            out.append(key)
+            if len(out) >= k:
+                break
+        return out
diff --git a/eval/run_eval.py b/eval/run_eval.py
index 9ba3aa6..8daa807 100644
--- a/eval/run_eval.py
+++ b/eval/run_eval.py
@@ -76,15 +76,87 @@ def main() -> int:
     queries = load_queries(args.queries)
     print(f"loaded {len(queries)} queries")
 
-    # TODO Phase 7: instantiate the retrievers you implemented in
-    # eval/retrievers.py and run each one against each query.
-    # Aggregate MRR / Recall@K / nDCG@K per retriever. Emit a
-    # markdown table to args.output. Commit the file alongside the
-    # PR that changes retrieval.
-    raise NotImplementedError(
-        "Wire up the retrievers in eval/retrievers.py first, then "
-        "fill in this evaluation loop. See PLAN.md Phase 7."
-    )
+    import os
+    import chromadb
+    from chromadb.config import Settings
+    from rag.embeddings import embedding_function
+    from rag.bm25 import BM25Index
+    from eval.retrievers import DenseRetriever, BM25Retriever, HybridRetriever
+
+    product = os.environ.get("PRODUCT_NAME", "hvm")
+    repo_root = Path(__file__).resolve().parent.parent
+    client = chromadb.PersistentClient(path=str(repo_root / "chroma"),
+                                       settings=Settings(anonymized_telemetry=False))
+    col = client.get_collection(f"{product}_docs", embedding_function=embedding_function())
+    bm = BM25Index(str(repo_root / "bm25" / f"{product}_docs.db"))
+
+    from eval.retrievers import RerankedRetriever
+
+    dense = DenseRetriever(col)
+    bm25 = BM25Retriever(bm)
+    hybrid = HybridRetriever(DenseRetriever(col, pool=100), BM25Retriever(bm, pool=100))
+
+    retrievers = [dense, bm25, hybrid]
+
+    rerank_url = os.environ.get("RERANK_URL", "").rstrip("/")
+    if rerank_url:
+        retrievers += [
+            RerankedRetriever(bm25, col, rerank_url, name_suffix="rerank", pool=50),
+            RerankedRetriever(hybrid, col, rerank_url, name_suffix="rerank", pool=50),
+        ]
+        print(f"reranker enabled: {rerank_url}")
+
+    rows: dict[str, dict[str, float]] = {}
+    per_query: list[dict] = []
+    for r in retrievers:
+        mrr_sum = recall_sum = ndcg_sum = 0.0
+        elapsed_sum = 0.0
+        for q in queries:
+            expected = [(e["bundle_id"], e["page_id"]) for e in q["expected"]]
+            t0 = time.time()
+            retrieved = r.retrieve(q["query"], k=max(args.k, 10))
+            elapsed = time.time() - t0
+            mrr = reciprocal_rank(retrieved, expected)
+            recall = recall_at_k(retrieved, expected, args.k)
+            ndcg = ndcg_at_k(retrieved, expected, args.k)
+            mrr_sum += mrr
+            recall_sum += recall
+            ndcg_sum += ndcg
+            elapsed_sum += elapsed
+            per_query.append({
+                "retriever": r.name, "query": q["query"],
+                "mrr": mrr, "recall@k": recall, "ndcg@k": ndcg,
+                "top1": list(retrieved[0]) if retrieved else None,
+                "elapsed_s": round(elapsed, 3),
+            })
+        n = len(queries)
+        rows[r.name] = {
+            "MRR": mrr_sum / n,
+            f"Recall@{args.k}": recall_sum / n,
+            f"nDCG@{args.k}": ndcg_sum / n,
+            "avg_latency_s": elapsed_sum / n,
+        }
+        print(f"  {r.name}: MRR={rows[r.name]['MRR']:.3f}  "
+              f"Recall@{args.k}={rows[r.name][f'Recall@{args.k}']:.3f}  "
+              f"nDCG@{args.k}={rows[r.name][f'nDCG@{args.k}']:.3f}  "
+              f"avg={rows[r.name]['avg_latency_s']*1000:.0f}ms")
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    md = [f"# Retrieval eval — k={args.k}", "",
+          f"_{len(queries)} hand-curated queries, generated {time.strftime('%Y-%m-%d %H:%M:%S')}_", "",
+          "| Retriever | MRR | Recall@{k} | nDCG@{k} | avg latency |".replace("{k}", str(args.k)),
+          "| --- | ---: | ---: | ---: | ---: |"]
+    for name, m in rows.items():
+        md.append(f"| `{name}` | {m['MRR']:.3f} | {m[f'Recall@{args.k}']:.3f} "
+                  f"| {m[f'nDCG@{args.k}']:.3f} | {m['avg_latency_s']*1000:.0f}ms |")
+    md += ["", "## Per-query results", "",
+           "| Retriever | Query | MRR | top-1 |", "| --- | --- | ---: | --- |"]
+    for r in per_query:
+        top1 = f"`{r['top1'][0]}/{r['top1'][1][:24]}...`" if r["top1"] else "—"
+        md.append(f"| `{r['retriever']}` | {r['query'][:60]} | {r['mrr']:.3f} | {top1} |")
+    args.output.write_text("\n".join(md) + "\n")
+    print(f"wrote {args.output}")
+    return 0
 
 
 if __name__ == "__main__":
diff --git a/rag/chunk.py b/rag/chunk.py
index b8d7317..c937c1f 100644
--- a/rag/chunk.py
+++ b/rag/chunk.py
@@ -31,6 +31,31 @@ from typing import Iterator
 CHARS_PER_TOKEN = 4
 TARGET_TOKENS = 500
 TARGET_CHARS = TARGET_TOKENS * CHARS_PER_TOKEN
+# Hard cap: nomic-embed-text's context is 2048 tokens. Anything larger
+# 400s the entire embed batch. 6000 chars works for prose but markdown
+# tables with lots of `|` separators tokenize ~1.4× denser; a 5839-char
+# table chunk from the HVM qualification matrix tokenized past 2048 and
+# crashed the rebuild. 4000 chars stays under 2048 tokens even for
+# dense table content while leaving headroom for the query side.
+MAX_CHARS = 4000
+
+
+def _hard_split(text: str) -> list[str]:
+    """Split an oversized block on line boundaries into MAX_CHARS pieces."""
+    if len(text) <= MAX_CHARS:
+        return [text]
+    out: list[str] = []
+    buf: list[str] = []
+    buf_chars = 0
+    for line in text.splitlines(keepends=True):
+        if buf_chars + len(line) > MAX_CHARS and buf:
+            out.append("".join(buf).rstrip())
+            buf, buf_chars = [], 0
+        buf.append(line)
+        buf_chars += len(line)
+    if buf:
+        out.append("".join(buf).rstrip())
+    return out
 
 
 def estimate_tokens(text: str) -> int:
@@ -104,23 +129,26 @@ def chunks_from_page(
 
     # ----- Body chunks: pack paragraphs up to TARGET_CHARS -------
     ordinal = 1
+
+    def emit(buf: list[str]) -> Iterator[dict]:
+        nonlocal ordinal
+        merged = "\n\n".join(buf)
+        for piece in _hard_split(merged):
+            yield {
+                "id":       f"{metadata['bundle_id']}::{page_id}::{ordinal}",
+                "text":     piece,
+                "metadata": {**metadata, "ordinal": ordinal},
+            }
+            ordinal += 1
+
     buf: list[str] = []
     buf_chars = 0
     for p in paragraphs:
         if buf_chars + len(p) > TARGET_CHARS and buf:
-            yield {
-                "id":       f"{metadata['bundle_id']}::{page_id}::{ordinal}",
-                "text":     "\n\n".join(buf),
-                "metadata": {**metadata, "ordinal": ordinal},
-            }
-            ordinal += 1
+            yield from emit(buf)
             buf = []
             buf_chars = 0
         buf.append(p)
         buf_chars += len(p)
     if buf:
-        yield {
-            "id":       f"{metadata['bundle_id']}::{page_id}::{ordinal}",
-            "text":     "\n\n".join(buf),
-            "metadata": {**metadata, "ordinal": ordinal},
-        }
+        yield from emit(buf)
diff --git a/rag/embeddings.py b/rag/embeddings.py
index 84d3bbd..a072f7a 100644
--- a/rag/embeddings.py
+++ b/rag/embeddings.py
@@ -3,8 +3,15 @@
 Swappable: implement the same `embedding_function()` interface returning
 a Chroma `EmbeddingFunction` and the rest of the pipeline doesn't care.
 
-Defaults (override via env):
-  OLLAMA_URL    one or more comma-separated URLs (load-balanced)
+Env-configurable (matches the zerto-docs-rag pattern so the same Gitea
+runner + GPU-pinned Ollama containers can serve every docs MCP build):
+
+  OLLAMA_URLS   comma-separated list, load-balanced round-robin per batch.
+                Preferred — set in the CI workflow to fan out across two
+                GPU-pinned Ollama containers on the Gitea host.
+  OLLAMA_URL    single endpoint, fallback when OLLAMA_URLS is unset.
+                Default http://192.168.0.2:11434 (the host where the GPUs
+                live in Justin's lab).
   EMBED_MODEL   model name; default 'nomic-embed-text'
   EMBED_DIM     expected embedding dim; default 768 (nomic-embed-text)
 """
@@ -19,8 +26,18 @@ from chromadb import EmbeddingFunction, Documents, Embeddings
 
 log = logging.getLogger(__name__)
 
-OLLAMA_URLS = [u.strip() for u in os.environ.get("OLLAMA_URL",
-               "http://localhost:11434").split(",") if u.strip()]
+DEFAULT_OLLAMA_URL = "http://192.168.0.2:11434"
+
+
+def _resolve_urls() -> list[str]:
+    raw = os.environ.get("OLLAMA_URLS", "").strip()
+    if raw:
+        return [u.strip().rstrip("/") for u in raw.split(",") if u.strip()]
+    single = os.environ.get("OLLAMA_URL", DEFAULT_OLLAMA_URL).strip().rstrip("/")
+    return [single]
+
+
+OLLAMA_URLS = _resolve_urls()
 EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text")
 EMBED_DIM = int(os.environ.get("EMBED_DIM", "768"))
 
diff --git a/rag/index.py b/rag/index.py
index 8d1c74f..f9b5ce2 100644
--- a/rag/index.py
+++ b/rag/index.py
@@ -29,7 +29,7 @@ CHROMA_DIR = ROOT / "chroma"
 
 # Collection name — convention: <product>_docs. Override via env if needed.
 import os
-PRODUCT_NAME = os.environ.get("PRODUCT_NAME", "myproduct")
+PRODUCT_NAME = os.environ.get("PRODUCT_NAME", "morpheus")
 COLLECTION = f"{PRODUCT_NAME}_docs"
 
 
diff --git a/requirements-rerank.txt b/requirements-rerank.txt
new file mode 100644
index 0000000..2c5fc27
--- /dev/null
+++ b/requirements-rerank.txt
@@ -0,0 +1,10 @@
+# Dev/CPU reranker — only for running scripts/rerank_server.py locally.
+# Production uses the llama.cpp + jina-reranker GGUF sidecar (see
+# deploy/docker-compose.yml). Install with:
+#
+#   pip install -r requirements-rerank.txt
+#
+# This adds PyTorch (~2 GB) and the sentence-transformers cross-encoder
+# (cross-encoder/ms-marco-MiniLM-L-6-v2, ~22 MB). Keep out of the main
+# requirements.txt so the production image stays slim.
+sentence-transformers>=3.0
diff --git a/requirements.txt b/requirements.txt
index b9982a9..431949b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,10 +10,18 @@ ollama>=0.4.0      # if using Ollama-hosted embedder; swap if not
 # Scraping (Phase 1; adjust per product)
 beautifulsoup4>=4.12
 requests>=2.31
+curl_cffi>=0.7         # for HPE QuickSpecs scrape (Chrome TLS impersonation)
+markdownify>=0.11
 # playwright>=1.40  # uncomment if you need headless browser fallback
 
 # Evaluation
 numpy>=1.26
 
+# Reranker is a sidecar (see deploy/docker-compose.yml). The MCP server
+# only needs httpx (declared above) to call it. For the dev / CPU
+# fallback reranker (scripts/rerank_server.py), install
+# requirements-rerank.txt separately — it pulls in PyTorch which would
+# triple the production image size.
+
 # Dev / utility
 python-dateutil>=2.8
diff --git a/scrape/README.md b/scrape/README.md
index 44d6df3..e9f6c95 100644
--- a/scrape/README.md
+++ b/scrape/README.md
@@ -7,6 +7,72 @@ the upstream doc portal.
 See `PLAN.md` Phase 1 for the corpus layout the rest of the pipeline
 expects.
 
+---
+
+## Product context — HPE Morpheus Enterprise Software
+
+**This repo is for HPE Morpheus Enterprise**, the full cloud-management
+platform. It is a **different SKU** from HPE Morpheus VM Essentials
+(HVM), which has its own MCP at `../hvm-docs/`. Don't ingest HVM
+docs here; they're a separate, smaller product (the "VM-only" subset
+of Morpheus). The Morpheus VM Essentials Deployment Guide refers to
+Morpheus Enterprise as the "elevate to" target — that's the
+relationship.
+
+`PRODUCT_NAME=morpheus`. Tool will be named `morpheus_api_lessons`,
+collection `morpheus_docs`, etc.
+
+### Upstream portal
+
+HPE Support DocPortal (Tridion/SDL-derived, same surface as HVM and
+the Zerto docs). Anonymous JSON API, no auth required.
+
+| Endpoint | Returns |
+|---|---|
+| `GET https://support.hpe.com/hpesc/public/api/document/{docId}` | DITA-source HTML — title page / abstract OR (for short docs like Release Notes) the entire body |
+| `GET https://support.hpe.com/hpesc/public/api/document/{docId}/toc` | Nested JSON tree of `{topicName, topicLink, description, children}`. Empty/404 for single-doc Release Notes. |
+| `GET https://support.hpe.com/hpesc/public/api/document/{docId}/render?page=GUID-XXXX.html` | `{docId, page_html, doc_meta, page_meta}` — single page body |
+
+User-facing URL format:
+`https://support.hpe.com/hpesc/public/docDisplay?docId={docId}&page=GUID-XXXX.html`
+
+### Bundle IDs (confirmed 2026-05-22)
+
+**Morpheus Enterprise User Manual** — ~569 pages each, full nested TOC:
+
+| Version | docId |
+|---|---|
+| 8.1.0  | `sd00007510en_us` |
+| 8.1.1  | `sd00007621en_us` |
+| 8.1.2  | `sd00007732en_us` |
+
+**Morpheus Enterprise Release Notes** — short, single-doc-blob shape
+(no TOC; full body returned by the `/document/{docId}` endpoint
+itself; scraper needs a `--single-doc` mode for these):
+
+| Version | docId |
+|---|---|
+| 8.1.0  | `sd00007496en_us` |
+| 8.1.1  | `sd00007610en_us` |
+| 8.1.2  | `sd00007733en_us` |
+
+### Cross-version peers are free
+
+GUIDs are stable across versions (confirmed on HVM where 374/376/376
+pages had 100% GUID overlap between adjacent versions). Same-GUID =
+same-topic. Synthesize `topic_cluster.clustered_topics` by looking
+up the same GUID in the other bundle slugs — no fuzzy matching
+needed.
+
+### Reusable from hvm-docs
+
+`../hvm-docs/scrape/bundles.py` and `../hvm-docs/scrape/runner.py`
+solve the identical portal shape. Copy and adapt the BUNDLES list +
+PRODUCT_NAME; the fetch logic should drop in unchanged. Both the
+TOC-paginated path and the single-doc path are needed (the HVM
+build covers both because HVM Release Notes follow the same shape).
+
+
 ## What you write
 
 At minimum, two scripts:
diff --git a/scrape/bundles.py b/scrape/bundles.py
new file mode 100644
index 0000000..3c3ab87
--- /dev/null
+++ b/scrape/bundles.py
@@ -0,0 +1,200 @@
+"""Discover Morpheus Enterprise doc bundles on HPE Support DocPortal and write bundles.json.
+
+Mirrors hvm-docs/scrape/bundles.py — same portal, same API shape, same single-doc-blob
+treatment for Release Notes, but pointing at the Morpheus Enterprise docId range.
+
+For each bundle this script:
+  1. GETs /hpesc/public/api/document/{docId}        → abstract HTML
+  2. GETs /hpesc/public/api/document/{docId}/toc    → page tree (or 404 for single-doc)
+  3. Writes bundles.json at repo root with the schema PLAN.md Phase 1 documents.
+
+QuickSpecs is a special case: lives at www.hpe.com (not support.hpe.com), gets the
+html-file mode and is scraped via curl_cffi (see scrape/quickspecs.py).
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import requests
+from bs4 import BeautifulSoup
+
+API = "https://support.hpe.com/hpesc/public/api/document"
+DOC_URL = "https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}"
+UA = "morpheus-docs-mcp/0.1 (+https://git.jpaul.io/justin/morpheus-docs; admin@jpaul.io)"
+ROOT = Path(__file__).resolve().parent.parent
+BUNDLES_JSON = ROOT / "bundles.json"
+
+
+@dataclass
+class BundleSpec:
+    slug: str
+    doc_id: str
+    title: str
+    version: str | None
+    product: str  # e.g. "User Manual", "Release Notes", "QuickSpecs"
+    mode: str    # "toc", "single", or "html-file"
+    platform: str | None = None
+    language: str = "en-US"
+    source_url: str | None = None   # overrides the default support.hpe.com URL
+
+
+# Declared bundles. Versions confirmed 2026-05-22 by probing the docId
+# range sd00006500..7740 for `Morpheus Enterprise` matches in the abstract.
+#
+# Notes:
+#   - Morpheus Enterprise has User Manuals dating back to 8.0.10
+#     (sd00006774en_us, Sep 2025) but we only ship the 8.1.x line for
+#     now. Add the 8.0.x bundles here if you need older versions in the
+#     corpus.
+#   - No dedicated Deployment Guide or Qualification Matrix for Morpheus
+#     Enterprise on HPE Support — the only QM (sd00006551en_us) covers
+#     HVM clusters managed by Morpheus, which lives in hvm-docs.
+#   - QuickSpecs lives on www.hpe.com (not support.hpe.com), uses the
+#     html-file scrape mode with curl_cffi Chrome impersonation.
+BUNDLES: list[BundleSpec] = [
+    BundleSpec("morpheus_user_manual_8_1_0",   "sd00007510en_us", "HPE Morpheus Enterprise Software Documentation", "8.1.0", "User Manual",   "toc"),
+    BundleSpec("morpheus_user_manual_8_1_1",   "sd00007621en_us", "HPE Morpheus Enterprise Software Documentation", "8.1.1", "User Manual",   "toc"),
+    BundleSpec("morpheus_user_manual_8_1_2",   "sd00007732en_us", "HPE Morpheus Enterprise Software Documentation", "8.1.2", "User Manual",   "toc"),
+    BundleSpec("morpheus_release_notes_8_1_0", "sd00007496en_us", "HPE Morpheus Enterprise Software Release Notes",  "8.1.0", "Release Notes", "single"),
+    BundleSpec("morpheus_release_notes_8_1_1", "sd00007610en_us", "HPE Morpheus Enterprise Software Release Notes",  "8.1.1", "Release Notes", "single"),
+    BundleSpec("morpheus_release_notes_8_1_2", "sd00007733en_us", "HPE Morpheus Enterprise Software Release Notes",  "8.1.2", "Release Notes", "single"),
+    BundleSpec("morpheus_quickspecs",          "a50009231enw",    "HPE Morpheus Enterprise Software QuickSpecs",
+               "v1", "QuickSpecs", "html-file",
+               source_url="https://www.hpe.com/psnow/doc/a50009231enw"),
+]
+
+
+def _session() -> requests.Session:
+    s = requests.Session()
+    s.headers.update({"User-Agent": UA, "Accept": "application/json, text/html"})
+    return s
+
+
+def _get(s: requests.Session, url: str, expect_json: bool = False, retries: int = 4) -> Any:
+    delay = 1.0
+    for attempt in range(retries):
+        r = s.get(url, timeout=30)
+        if r.status_code == 200:
+            return r.json() if expect_json else r.text
+        if r.status_code == 404:
+            return None
+        if r.status_code in (429, 500, 502, 503, 504):
+            time.sleep(delay)
+            delay *= 2
+            continue
+        r.raise_for_status()
+    raise RuntimeError(f"GET failed after {retries} retries: {url}")
+
+
+def _count_toc(toc: list[dict] | None) -> tuple[int, str | None]:
+    if not toc:
+        return 0, None
+    landing = None
+    n = 0
+
+    def walk(nodes: list[dict] | None, depth: int) -> None:
+        nonlocal n, landing
+        for node in nodes or []:
+            link = node.get("topicLink")
+            if link:
+                n += 1
+                m = re.search(r"page=(GUID-[A-F0-9-]+)\.html", link)
+                if m and landing is None:
+                    landing = m.group(1)
+            walk(node.get("children"), depth + 1)
+
+    walk(toc, 0)
+    return n, landing
+
+
+def _parse_abstract(html: str) -> dict[str, str]:
+    soup = BeautifulSoup(html, "html.parser")
+    out: dict[str, str] = {}
+    h1 = soup.select_one("h1.title.topictitle1")
+    if h1:
+        out["title"] = h1.get_text(" ", strip=True)
+    desc = soup.select_one("div.desc")
+    if desc:
+        out["abstract"] = desc.get_text(" ", strip=True)
+    pub = soup.select_one("div.publishedDate")
+    if pub:
+        out["published"] = pub.get_text(" ", strip=True).replace("Published:", "").strip()
+    return out
+
+
+def discover_bundle(s: requests.Session, spec: BundleSpec) -> dict[str, Any]:
+    # html-file bundles are static fixtures or live-fetched outside support.hpe.com.
+    if spec.mode == "html-file":
+        return {
+            "slug": spec.slug,
+            "doc_id": spec.doc_id,
+            "title": spec.title,
+            "version": spec.version,
+            "platform": spec.platform,
+            "product": spec.product,
+            "language": spec.language,
+            "page_count": 1,
+            "mode": "html-file",
+            "abstract": "",
+            "dates": {},
+            "landing_page": spec.doc_id,
+            "source_url": spec.source_url or f"https://www.hpe.com/psnow/doc/{spec.doc_id}",
+        }
+
+    abstract_html = _get(s, f"{API}/{spec.doc_id}", expect_json=False)
+    meta = _parse_abstract(abstract_html or "")
+
+    page_count: int
+    landing: str | None
+    if spec.mode == "toc":
+        toc = _get(s, f"{API}/{spec.doc_id}/toc", expect_json=True)
+        page_count, landing = _count_toc(toc)
+        if page_count == 0:
+            print(f"  ! {spec.slug}: TOC empty — falling back to single-doc mode", file=sys.stderr)
+            spec.mode = "single"
+            page_count, landing = 1, spec.doc_id
+    else:
+        page_count, landing = 1, spec.doc_id
+
+    return {
+        "slug": spec.slug,
+        "doc_id": spec.doc_id,
+        "title": meta.get("title") or spec.title,
+        "version": spec.version,
+        "platform": spec.platform,
+        "product": spec.product,
+        "language": spec.language,
+        "page_count": page_count,
+        "mode": spec.mode,
+        "abstract": meta.get("abstract", ""),
+        "dates": {"Published": meta.get("published", "")},
+        "landing_page": landing,
+        "source_url": spec.source_url or DOC_URL.format(doc_id=spec.doc_id),
+    }
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description="Build bundles.json from BUNDLES list.")
+    p.add_argument("--out", default=str(BUNDLES_JSON))
+    args = p.parse_args()
+
+    s = _session()
+    out: list[dict[str, Any]] = []
+    for spec in BUNDLES:
+        print(f"  • {spec.slug} ({spec.doc_id}) ...", file=sys.stderr)
+        out.append(discover_bundle(s, spec))
+
+    Path(args.out).write_text(json.dumps(out, indent=2) + "\n")
+    print(f"wrote {args.out}: {len(out)} bundles, {sum(b['page_count'] for b in out)} pages total", file=sys.stderr)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scrape/quickspecs.py b/scrape/quickspecs.py
new file mode 100644
index 0000000..12b72d6
--- /dev/null
+++ b/scrape/quickspecs.py
@@ -0,0 +1,194 @@
+"""Scrape HPE QuickSpecs collateral pages into corpus markdown.
+
+HPE QuickSpecs live at `https://www.hpe.com/us/en/collaterals/collateral.<doc_id>.html`
+with a server-rendered HTML body (confirmed 2026-05-22 by inspecting the
+captured DOM). The blocker for automated scraping is `www.hpe.com`'s
+edge bot defense, which drops connections from non-browser TLS
+fingerprints (curl, wget, Python-urllib, even WebFetch). Bypassed here
+by `curl_cffi` impersonating Chrome 120's JA3/JA4 fingerprint.
+
+Content extraction uses these stable CSS selectors found in the page:
+
+  .lr-right-rail hpe-highlights-container .collateral-content
+       — one per section ("Overview", "Standard Features", etc.)
+  h3.txto-title          — section title
+  div.txto-description   — section body
+  uc-table.uc-table-polaris   — SKU / version-history tables
+
+A committed HTML fixture at `scrape/quickspecs/<doc_id>.html` is used
+as a fallback when the live fetch fails (HPE edge churn, network
+issues). Keeping a current fixture in the repo also makes diffing
+QuickSpecs revisions easy.
+
+Usage (called by scrape.runner for bundles with mode="quickspecs"):
+
+    python -m scrape.quickspecs a50004260enw
+
+Or programmatically:
+
+    from scrape.quickspecs import scrape_quickspecs
+    scrape_quickspecs("a50004260enw", bundle_id="hvm_quickspecs", title="...")
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import sys
+from pathlib import Path
+
+from bs4 import BeautifulSoup, NavigableString
+from markdownify import markdownify as md
+
+log = logging.getLogger(__name__)
+
+ROOT = Path(__file__).resolve().parent.parent
+SOURCE_DIR = ROOT / "scrape" / "quickspecs"
+CORPUS_DIR = ROOT / "corpus"
+
+COLLATERAL_URL = "https://www.hpe.com/us/en/collaterals/collateral.{doc_id}.html"
+
+
+def fetch_live(doc_id: str, timeout: float = 30.0) -> str | None:
+    """GET the collateral page via curl_cffi (Chrome 120 TLS fingerprint).
+    Returns the HTML body on success, None on any failure."""
+    try:
+        from curl_cffi import requests as cc
+    except ImportError:
+        log.warning("curl_cffi not installed; can't fetch QuickSpecs live")
+        return None
+    try:
+        r = cc.get(COLLATERAL_URL.format(doc_id=doc_id),
+                   impersonate="chrome120", timeout=timeout)
+        if r.status_code != 200 or not r.text:
+            log.warning("QuickSpecs %s: http=%s bytes=%d", doc_id, r.status_code, len(r.text or ""))
+            return None
+        return r.text
+    except Exception as e:
+        log.warning("QuickSpecs %s live fetch failed: %s", doc_id, e)
+        return None
+
+
+def fetch_fixture(doc_id: str) -> str | None:
+    """Read the committed HTML fixture as fallback."""
+    p = SOURCE_DIR / f"{doc_id}.html"
+    if not p.exists():
+        return None
+    return p.read_text()
+
+
+def _extract_content_blocks(html: str) -> list[str]:
+    """Pull each section block (.collateral-content under .lr-right-rail).
+
+    The fixture format (just .quickspecs-content wrapper) and the live
+    format (.lr-right-rail with nested hpe-highlights-container) are
+    both supported. Returns a list of section HTML strings, in document
+    order.
+    """
+    soup = BeautifulSoup(html, "html.parser")
+    # Live format: each <hpe-highlights-container> under .lr-right-rail has
+    # one or more .collateral-content blocks; concat them.
+    rail = soup.select_one(".lr-right-rail")
+    if rail is not None:
+        blocks = rail.select(".collateral-content")
+        return [str(b) for b in blocks]
+    # Fixture format: a single wrapper holding all the H2/H3 sections.
+    wrapper = soup.select_one(".quickspecs-content")
+    if wrapper is not None:
+        return [str(wrapper)]
+    # Last-resort: whole body.
+    body = soup.body or soup
+    return [str(body)]
+
+
+def parse_html(html: str) -> str:
+    """Convert QuickSpecs HTML to clean markdown.
+
+    Filters out the page chrome (nav, footer, recommendations carousel,
+    cookie banner, analytics blobs) by extracting only the content
+    blocks, then runs markdownify."""
+    blocks = _extract_content_blocks(html)
+    chunks: list[str] = []
+    for block in blocks:
+        soup = BeautifulSoup(block, "html.parser")
+        # Drop anchor placeholders that markdownify turns into noisy links
+        for a in soup.select('[hpe-left-rail-anchor]'):
+            a.decompose()
+        # Drop carousel / share / recommendation widgets if any leaked in.
+        for sel in ("esl-share", "hpe-recommendations", "hpe-sticky-bar",
+                    "esl-scrollbar", "esl-trigger", "video-overlay",
+                    "generic-modal-loader", "style", "script"):
+            for el in soup.select(sel):
+                el.decompose()
+        chunks.append(md(str(soup), heading_style="ATX", bullets="-",
+                          strip=["span", "div"]))
+    text = "\n\n".join(chunks)
+    # Collapse runs of blank lines markdownify likes to emit.
+    text = "\n".join(line.rstrip() for line in text.splitlines())
+    while "\n\n\n" in text:
+        text = text.replace("\n\n\n", "\n\n")
+    return text.strip() + "\n"
+
+
+def scrape_quickspecs(doc_id: str, bundle_id: str, title: str,
+                     version: str | None = None,
+                     product: str = "QuickSpecs",
+                     source_url: str | None = None,
+                     force: bool = False) -> bool:
+    """Live-fetch (or fall back to fixture), parse, write corpus files.
+
+    Returns True if files were written, False if skipped (already exists
+    and --force not set)."""
+    bundle_dir = CORPUS_DIR / bundle_id
+    md_path = bundle_dir / f"{doc_id}.md"
+    json_path = bundle_dir / f"{doc_id}.json"
+    if not force and md_path.exists() and json_path.exists():
+        log.info("  %s/%s: already on disk (use --force to refresh)", bundle_id, doc_id)
+        return False
+
+    html = fetch_live(doc_id)
+    fetched_from = "live"
+    if html is None:
+        html = fetch_fixture(doc_id)
+        fetched_from = "fixture"
+    if html is None:
+        log.error("QuickSpecs %s: no live response and no fixture at %s",
+                  doc_id, SOURCE_DIR / f"{doc_id}.html")
+        return False
+
+    body_md = parse_html(html)
+    bundle_dir.mkdir(parents=True, exist_ok=True)
+    md_path.write_text(body_md)
+    sidecar = {
+        "bundle_id": bundle_id,
+        "page_id": doc_id,
+        "title": title,
+        "ordinal": 1,
+        "parent_title": None,
+        "doc_id": doc_id,
+        "version": version,
+        "product": product,
+        "source_url": source_url or f"https://www.hpe.com/psnow/doc/{doc_id}",
+        "fetched_from": fetched_from,
+    }
+    json_path.write_text(json.dumps(sidecar, indent=2) + "\n")
+    log.info("  %s/%s: %d bytes from %s", bundle_id, doc_id, len(body_md), fetched_from)
+    return True
+
+
+def main() -> int:
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
+    p = argparse.ArgumentParser()
+    p.add_argument("doc_id", help="QuickSpecs document id, e.g. a50004260enw")
+    p.add_argument("--bundle-id", default="hvm_quickspecs")
+    p.add_argument("--title", default="HPE Morpheus VM Essentials Software QuickSpecs")
+    p.add_argument("--version", default=None)
+    p.add_argument("--force", action="store_true")
+    args = p.parse_args()
+    ok = scrape_quickspecs(args.doc_id, args.bundle_id, args.title,
+                            args.version, force=args.force)
+    return 0 if ok else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scrape/quickspecs/README.md b/scrape/quickspecs/README.md
new file mode 100644
index 0000000..e301411
--- /dev/null
+++ b/scrape/quickspecs/README.md
@@ -0,0 +1,27 @@
+# scrape/quickspecs/
+
+Static HTML fixtures for HPE QuickSpecs documents that aren't reachable
+from the runner (www.hpe.com edge drops connections from datacenter IPs
+with non-browser User-Agents — verified 2026-05-22 with curl, wget, and
+Anthropic's WebFetch).
+
+## Workflow
+
+1. Operator visits `https://www.hpe.com/psnow/doc/<doc_id>` in a real
+   browser, opens DevTools → Elements → Copy the `<body>` HTML.
+2. Save it at `scrape/quickspecs/<doc_id>.html`.
+3. Add a bundle entry in `scrape/bundles.py` with `mode="html-file"`.
+4. `python -m scrape.runner --bundle hvm_quickspecs --force` reads the
+   committed HTML and writes `corpus/hvm_quickspecs/<doc_id>.{md,json}`.
+5. Re-index and ship.
+
+QuickSpecs only update every few months (HPE rebrand, new SKU added,
+feature change). When a new version drops, refresh the local HTML
+file and re-run the scrape.
+
+## Current fixtures
+
+- `a50004260enw.html` — HPE Morpheus VM Essentials Software QuickSpecs
+  (Version 4, 02-February-2026). SKUs: S5Q81AAE (1-yr), S5Q82AAE
+  (3-yr), S5Q83AAE (5-yr) — all "per Socket E-LTU" with Tech Care
+  Essentials included.
diff --git a/scrape/runner.py b/scrape/runner.py
new file mode 100644
index 0000000..c83f4b2
--- /dev/null
+++ b/scrape/runner.py
@@ -0,0 +1,339 @@
+"""Scrape HVM doc bundles into corpus/<slug>/<page_id>.{md,json}.
+
+Reads bundles.json (produced by scrape.bundles), then for each bundle:
+  - mode="toc":    walks the TOC tree, fetches each page via the render
+                   endpoint, converts page_html to markdown, writes
+                   <page_id>.md + <page_id>.json sidecar.
+  - mode="single": fetches /document/{docId} directly, treats the whole
+                   body as one page with page_id = doc_id.
+
+After all bundles are on disk, runs a finalize pass that synthesizes
+topic_cluster.clustered_topics for each page by looking up the same
+GUID in sibling bundles (HPE GUIDs are stable across versions — see
+reference_hpe_docs_portal_api.md).
+
+Usage:
+    python -m scrape.runner --all
+    python -m scrape.runner --bundle hvm_user_manual_8_1_2
+    python -m scrape.runner --all --force        # re-download already-on-disk pages
+    python -m scrape.runner --finalize-only      # only redo the topic_cluster pass
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import requests
+from bs4 import BeautifulSoup
+from markdownify import markdownify as md
+
+API = "https://support.hpe.com/hpesc/public/api/document"
+DOC_URL = "https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}&page={page_id}.html"
+DOC_URL_SINGLE = "https://support.hpe.com/hpesc/public/docDisplay?docId={doc_id}"
+UA = "hvm-docs-mcp/0.1 (+https://git.jpaul.io/justin/hvm-docs; admin@jpaul.io)"
+ROOT = Path(__file__).resolve().parent.parent
+CORPUS = ROOT / "corpus"
+BUNDLES_JSON = ROOT / "bundles.json"
+
+GUID_RE = re.compile(r"page=(GUID-[A-F0-9-]+)\.html")
+
+
+@dataclass
+class TocEntry:
+    page_id: str
+    title: str
+    ordinal: int
+    parent_title: str | None
+
+
+def _session() -> requests.Session:
+    s = requests.Session()
+    s.headers.update({"User-Agent": UA, "Accept": "application/json, text/html"})
+    return s
+
+
+def _get(s: requests.Session, url: str, expect_json: bool = False, retries: int = 4) -> Any:
+    delay = 1.0
+    for attempt in range(retries):
+        r = s.get(url, timeout=30)
+        if r.status_code == 200:
+            return r.json() if expect_json else r.text
+        if r.status_code == 404:
+            return None
+        if r.status_code in (429, 500, 502, 503, 504):
+            time.sleep(delay)
+            delay *= 2
+            continue
+        r.raise_for_status()
+    raise RuntimeError(f"GET failed after {retries} retries: {url}")
+
+
+def _flatten_toc(toc: list[dict]) -> list[TocEntry]:
+    out: list[TocEntry] = []
+    ordinal = 0
+
+    def walk(nodes: list[dict] | None, parent_title: str | None) -> None:
+        nonlocal ordinal
+        for node in nodes or []:
+            title = node.get("topicName") or ""
+            link = node.get("topicLink") or ""
+            m = GUID_RE.search(link)
+            if m:
+                ordinal += 1
+                out.append(TocEntry(page_id=m.group(1), title=title, ordinal=ordinal, parent_title=parent_title))
+            walk(node.get("children"), title or parent_title)
+
+    walk(toc, None)
+    return out
+
+
+def _strip_dita_wrappers(html: str) -> str:
+    """Remove the outer <main class="ditasrc">, drop the trademark Notices section,
+    and unwrap aria-only span markup so markdownify produces clean text.
+
+    DITA's notices boilerplate repeats across every doc; if we leave it in,
+    every page chunk inherits the same trademark text and pollutes retrieval."""
+    soup = BeautifulSoup(html, "html.parser")
+    # Drop the Notices/Acknowledgments/Abstract boilerplate by section heading.
+    # Every doc on the portal carries the same legal Notices and trademark
+    # Acknowledgments; if we leave them in, every chunk inherits the same
+    # text and pollutes retrieval. Abstract is one-line marketing.
+    boilerplate = {"Notices", "Acknowledgments", "Abstract"}
+    # Wrapped form: <article>/<section>/<div> whose first heading child is boilerplate.
+    for sec in soup.select("article, section, div"):
+        h = sec.find(["h1", "h2"], recursive=False)
+        if h and h.get_text(strip=True) in boilerplate:
+            sec.decompose()
+    # Unwrapped form: bare <h1>/<h2>Boilerplate</h2> followed by its .desc/.body sibling.
+    for h in soup.find_all(["h1", "h2"]):
+        if h.get_text(strip=True) in boilerplate:
+            sib = h.find_next_sibling()
+            if sib and (sib.name in {"div", "section"}):
+                cls = " ".join(sib.get("class", []) or [])
+                if "desc" in cls or "body" in cls or "notices" in cls:
+                    sib.decompose()
+            h.decompose()
+    main = soup.find("main")
+    return str(main) if main else str(soup)
+
+
+def html_to_md(page_html: str) -> str:
+    cleaned = _strip_dita_wrappers(page_html)
+    text = md(cleaned, heading_style="ATX", bullets="-")
+    # collapse runs of blank lines
+    text = re.sub(r"\n{3,}", "\n\n", text).strip()
+    return text + "\n"
+
+
+def fetch_toc_page(s: requests.Session, doc_id: str, page_id: str) -> str:
+    payload = _get(s, f"{API}/{doc_id}/render?page={page_id}.html", expect_json=True)
+    if not payload:
+        return ""
+    return payload.get("page_html") or ""
+
+
+def fetch_single_doc(s: requests.Session, doc_id: str) -> tuple[str, str]:
+    """Returns (page_html, title) for a single-doc-shape bundle."""
+    html = _get(s, f"{API}/{doc_id}")
+    if not html:
+        return "", ""
+    soup = BeautifulSoup(html, "html.parser")
+    h1 = soup.select_one("h1.title.topictitle1")
+    title = h1.get_text(" ", strip=True) if h1 else doc_id
+    return html, title
+
+
+def write_page(bundle_dir: Path, page_id: str, body_md: str, sidecar: dict[str, Any], force: bool) -> bool:
+    bundle_dir.mkdir(parents=True, exist_ok=True)
+    md_path = bundle_dir / f"{page_id}.md"
+    json_path = bundle_dir / f"{page_id}.json"
+    if not force and md_path.exists() and json_path.exists():
+        return False
+    md_path.write_text(body_md)
+    json_path.write_text(json.dumps(sidecar, indent=2) + "\n")
+    return True
+
+
+def scrape_toc_bundle(s: requests.Session, bundle: dict, force: bool, concurrency: int) -> int:
+    doc_id = bundle["doc_id"]
+    slug = bundle["slug"]
+    bundle_dir = CORPUS / slug
+
+    toc = _get(s, f"{API}/{doc_id}/toc", expect_json=True) or []
+    entries = _flatten_toc(toc)
+    print(f"  {slug}: {len(entries)} pages", file=sys.stderr)
+
+    written = 0
+    def do_one(entry: TocEntry) -> bool:
+        page_html = fetch_toc_page(s, doc_id, entry.page_id)
+        if not page_html:
+            return False
+        body_md = html_to_md(page_html)
+        sidecar = {
+            "bundle_id": slug,
+            "page_id": entry.page_id,
+            "title": entry.title,
+            "ordinal": entry.ordinal,
+            "parent_title": entry.parent_title,
+            "doc_id": doc_id,
+            "version": bundle.get("version"),
+            "product": bundle.get("product"),
+            "source_url": DOC_URL.format(doc_id=doc_id, page_id=entry.page_id),
+            # topic_cluster filled in by finalize()
+        }
+        return write_page(bundle_dir, entry.page_id, body_md, sidecar, force)
+
+    with ThreadPoolExecutor(max_workers=concurrency) as pool:
+        for fut in as_completed(pool.submit(do_one, e) for e in entries):
+            if fut.result():
+                written += 1
+    return written
+
+
+def scrape_single_bundle(s: requests.Session, bundle: dict, force: bool) -> int:
+    doc_id = bundle["doc_id"]
+    slug = bundle["slug"]
+    bundle_dir = CORPUS / slug
+
+    html, title = fetch_single_doc(s, doc_id)
+    if not html:
+        print(f"  ! {slug}: empty body", file=sys.stderr)
+        return 0
+    body_md = html_to_md(html)
+    sidecar = {
+        "bundle_id": slug,
+        "page_id": doc_id,
+        "title": title or bundle["title"],
+        "ordinal": 1,
+        "parent_title": None,
+        "doc_id": doc_id,
+        "version": bundle.get("version"),
+        "product": bundle.get("product"),
+        "source_url": DOC_URL_SINGLE.format(doc_id=doc_id),
+    }
+    print(f"  {slug}: 1 page (single-doc)", file=sys.stderr)
+    return 1 if write_page(bundle_dir, doc_id, body_md, sidecar, force) else 0
+
+
+def finalize_clusters(bundles: list[dict]) -> int:
+    """Cross-link sibling pages with the same GUID across version bundles.
+
+    For TOC bundles, page_id == GUID; same GUID across two bundles = same
+    underlying topic. For single-doc bundles (page_id == doc_id), peer them
+    by matching product+version-sibling on the `product` field."""
+    # GUID → list[(slug, sidecar_path, sidecar_dict)]
+    guid_to_pages: dict[str, list[tuple[str, Path, dict]]] = {}
+    # product → list[(slug, sidecar_path, sidecar_dict)] for single-doc peering
+    product_to_pages: dict[str, list[tuple[str, Path, dict]]] = {}
+
+    for b in bundles:
+        slug = b["slug"]
+        bundle_dir = CORPUS / slug
+        if not bundle_dir.exists():
+            continue
+        for jp in bundle_dir.glob("*.json"):
+            data = json.loads(jp.read_text())
+            pid = data["page_id"]
+            if pid.startswith("GUID-"):
+                guid_to_pages.setdefault(pid, []).append((slug, jp, data))
+            else:
+                product_to_pages.setdefault(b["product"], []).append((slug, jp, data))
+
+    updated = 0
+    # TOC pages — cluster by GUID
+    for guid, peers in guid_to_pages.items():
+        if len(peers) < 2:
+            continue
+        for slug, jp, data in peers:
+            others = [
+                {"bundle_id": s2, "page_id": guid, "clustering_title": d2.get("title", "")}
+                for s2, _, d2 in peers if s2 != slug
+            ]
+            data["topic_cluster"] = {"clustering_title": data.get("title", ""), "clustered_topics": others}
+            jp.write_text(json.dumps(data, indent=2) + "\n")
+            updated += 1
+    # Single-doc pages — cluster by product (e.g. Release Notes 8.1.0/.1/.2)
+    for product, peers in product_to_pages.items():
+        if len(peers) < 2:
+            continue
+        for slug, jp, data in peers:
+            others = [
+                {"bundle_id": s2, "page_id": d2["page_id"], "clustering_title": d2.get("title", "")}
+                for s2, _, d2 in peers if s2 != slug
+            ]
+            data["topic_cluster"] = {"clustering_title": data.get("title", ""), "clustered_topics": others}
+            jp.write_text(json.dumps(data, indent=2) + "\n")
+            updated += 1
+
+    return updated
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description="Scrape HVM bundles into corpus/.")
+    p.add_argument("--all", action="store_true", help="scrape every bundle in bundles.json")
+    p.add_argument("--bundle", action="append", help="scrape one bundle by slug (repeatable)")
+    p.add_argument("--force", action="store_true", help="re-fetch pages already on disk")
+    p.add_argument("--concurrency", type=int, default=6)
+    p.add_argument("--finalize-only", action="store_true", help="only rebuild topic_cluster sidecar fields")
+    args = p.parse_args()
+
+    if not BUNDLES_JSON.exists():
+        print(f"bundles.json missing — run `python -m scrape.bundles` first", file=sys.stderr)
+        return 2
+
+    bundles = json.loads(BUNDLES_JSON.read_text())
+
+    if args.finalize_only:
+        n = finalize_clusters(bundles)
+        print(f"finalize: updated topic_cluster on {n} sidecars", file=sys.stderr)
+        return 0
+
+    if args.bundle:
+        bundles = [b for b in bundles if b["slug"] in args.bundle]
+        if not bundles:
+            print(f"no bundles matched: {args.bundle}", file=sys.stderr)
+            return 2
+    elif not args.all:
+        print("specify --all or --bundle <slug>", file=sys.stderr)
+        return 2
+
+    s = _session()
+    total = 0
+    for b in bundles:
+        mode = b.get("mode")
+        if mode == "single":
+            total += scrape_single_bundle(s, b, args.force)
+        elif mode == "html-file":
+            # Live-scrape HPE collateral (QuickSpecs) via curl_cffi; falls back
+            # to scrape/quickspecs/<doc_id>.html fixture if the edge blocks us.
+            from scrape.quickspecs import scrape_quickspecs
+            ok = scrape_quickspecs(
+                doc_id=b["doc_id"], bundle_id=b["slug"],
+                title=b.get("title", b["doc_id"]),
+                version=b.get("version"),
+                product=b.get("product", "QuickSpecs"),
+                source_url=b.get("source_url"),
+                force=args.force,
+            )
+            total += 1 if ok else 0
+        else:
+            total += scrape_toc_bundle(s, b, args.force, args.concurrency)
+    print(f"scraped {total} new/updated pages", file=sys.stderr)
+
+    # Always finalize after a scrape so sidecars are consistent.
+    all_bundles = json.loads(BUNDLES_JSON.read_text())
+    n = finalize_clusters(all_bundles)
+    print(f"finalize: updated topic_cluster on {n} sidecars", file=sys.stderr)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/registry_gc.py b/scripts/registry_gc.py
index 41bbc52..0e900ca 100644
--- a/scripts/registry_gc.py
+++ b/scripts/registry_gc.py
@@ -1,42 +1,58 @@
 """Gitea container-registry garbage collection.
 
-Lists package versions for one container package and deletes versions
-older than --keep-days. Always preserves:
+Lists tagged versions of one container package and deletes old ones.
+Always preserves:
 
-  - the :latest tag
-  - the --keep-latest most-recent date-tagged versions
-  - anything pushed in the last --keep-days days
+  - the `latest` tag (Watchtower's auto-deploy target)
+  - the `--keep-latest` most-recent date-tagged versions (YYYY.MM.DD)
+  - the `--keep-latest` most-recent short-SHA tags (rollback pins)
+  - anything pushed within `--keep-days` days
 
-The actual disk reclaim happens on Gitea's next package GC cron (admin
-site settings). This script just marks the versions for deletion.
+OCI blob-level versions (`sha256:...`) are never touched directly — those
+are managed by Gitea's internal package GC cron when their last tag
+goes away.
 
 Usage:
 
-    python scripts/registry_gc.py \\
-        --owner <user> \\
-        --package <product>-docs-mcp \\
+    GITEA_TOKEN=... python scripts/registry_gc.py \\
+        --owner justin \\
+        --package hvm-docs \\
         --keep-days 90 \\
         --keep-latest 5
 
-Auth: reads GITEA_TOKEN from env (set in the workflow as a secret).
+The Gitea endpoint shape (confirmed 2026-05-22 against git.jpaul.io):
+
+    GET    /api/v1/packages/{owner}/container/{package}
+           -> [{id, version, created_at, ...}, ...]
+    DELETE /api/v1/packages/{owner}/container/{package}/{version}
 """
 from __future__ import annotations
 
 import argparse
+import json
 import os
+import re
 import sys
 from datetime import datetime, timedelta, timezone
-from urllib.request import Request, urlopen
 from urllib.error import HTTPError
-import json
-
+from urllib.parse import quote
+from urllib.request import Request, urlopen
 
 GITEA_HOST = os.environ.get("GITEA_HOST", "https://git.jpaul.io")
+DATE_TAG = re.compile(r"^\d{4}\.\d{2}\.\d{2}$")
+SHA_TAG = re.compile(r"^[0-9a-f]{7,40}$")  # short or full git SHA
+BLOB_VER = re.compile(r"^sha256:")          # OCI blob versions — skip
 
 
 def api(token: str, method: str, path: str) -> object:
+    # Explicit User-Agent: git.jpaul.io is behind Cloudflare, whose default
+    # Bot Fight Mode 403s `Python-urllib/X.Y` with error 1010. Any
+    # recognizable browser/curl-style UA passes.
     req = Request(f"{GITEA_HOST}{path}",
-                  headers={"Authorization": f"token {token}"},
+                  headers={
+                      "Authorization": f"token {token}",
+                      "User-Agent": "hvm-docs-registry-gc/1.0",
+                  },
                   method=method)
     try:
         with urlopen(req, timeout=30) as r:
@@ -63,44 +79,57 @@ def main() -> int:
         return 1
 
     versions = api(token, "GET",
-                   f"/api/v1/packages/{args.owner}/container/{args.package}/versions") or []
+                   f"/api/v1/packages/{args.owner}/container/{args.package}") or []
     if not versions:
-        print(f"no versions found for {args.owner}/{args.package}")
+        print(f"no versions found for {args.owner}/container/{args.package}")
         return 0
 
     cutoff = datetime.now(timezone.utc) - timedelta(days=args.keep_days)
+    print(f"  {len(versions)} version(s); cutoff={cutoff.isoformat()}  "
+          f"keep_days={args.keep_days}  keep_latest={args.keep_latest}")
 
-    # Date-tagged versions (YYYY.MM.DD), newest first
-    date_tagged = []
-    for v in versions:
-        tags = v.get("tags") or []
-        for t in tags:
-            if len(t) == 10 and t[4] == "." and t[7] == ".":
-                date_tagged.append((t, v))
-                break
-    date_tagged.sort(key=lambda kv: kv[0], reverse=True)
-    keep_date_tags = {t for t, _ in date_tagged[:args.keep_latest]}
-
-    deleted = 0
-    for v in versions:
-        tags = v.get("tags") or []
-        if "latest" in tags:
-            continue
-        if any(t in keep_date_tags for t in tags):
-            continue
+    # Sort newest first by created_at.
+    def parsed_ts(v: dict) -> datetime:
         try:
-            created = datetime.fromisoformat(v["created_at"].replace("Z", "+00:00"))
+            return datetime.fromisoformat(v["created_at"].replace("Z", "+00:00"))
         except (KeyError, ValueError):
+            return datetime.min.replace(tzinfo=timezone.utc)
+
+    versions.sort(key=parsed_ts, reverse=True)
+
+    # Compute the keep-set: top-N date tags + top-N sha tags + always latest.
+    keep_dates: list[str] = []
+    keep_shas: list[str] = []
+    for v in versions:
+        ver = v.get("version") or ""
+        if DATE_TAG.match(ver) and len(keep_dates) < args.keep_latest:
+            keep_dates.append(ver)
+        elif SHA_TAG.match(ver) and len(keep_shas) < args.keep_latest:
+            keep_shas.append(ver)
+    keep = {"latest", *keep_dates, *keep_shas}
+    print(f"  keep tags: {sorted(keep)}")
+
+    deleted = skipped_blob = skipped_age = skipped_keep = 0
+    for v in versions:
+        ver = v.get("version") or ""
+        ts = parsed_ts(v)
+        if BLOB_VER.match(ver):
+            skipped_blob += 1
             continue
-        if created >= cutoff:
+        if ver in keep:
+            skipped_keep += 1
             continue
-        version_id = v.get("id")
-        print(f"  deleting v{version_id}  tags={tags}  created={v['created_at']}")
+        if ts >= cutoff:
+            skipped_age += 1
+            continue
+        print(f"  deleting {ver!r}  id={v.get('id')}  created={v.get('created_at')}")
         if not args.dry_run:
             api(token, "DELETE",
-                f"/api/v1/packages/{args.owner}/container/{args.package}/versions/{version_id}")
+                f"/api/v1/packages/{args.owner}/container/{args.package}/{quote(ver, safe='')}")
             deleted += 1
-    print(f"done: {deleted} version(s) deleted")
+
+    print(f"done: deleted={deleted}  kept_named={skipped_keep}  "
+          f"kept_recent={skipped_age}  skipped_blobs={skipped_blob}")
     return 0
 
 
diff --git a/scripts/rerank_server.py b/scripts/rerank_server.py
new file mode 100644
index 0000000..f7e08b5
--- /dev/null
+++ b/scripts/rerank_server.py
@@ -0,0 +1,120 @@
+"""Minimal HTTP reranker — `/v1/rerank` endpoint over a sentence-transformers CrossEncoder.
+
+Matches the Cohere `/v1/rerank` request/response shape, which is what the
+server's `_rerank()` helper expects. This is the dev-friendly fallback;
+production replaces this with the llama.cpp + jina-reranker-v2-base GGUF
+sidecar (see deploy/docker-compose.yml) without changing the client.
+
+Request:
+    POST /v1/rerank
+    {"model": "...", "query": "...", "documents": ["text", ...], "top_n": 10}
+
+Response:
+    {"model": "...", "results": [{"index": 0, "relevance_score": 0.93}, ...]}
+
+Usage:
+    python -m scripts.rerank_server                   # localhost:8001
+    RERANK_MODEL=cross-encoder/ms-marco-MiniLM-L-12-v2 \\
+    RERANK_PORT=8001 python -m scripts.rerank_server
+"""
+from __future__ import annotations
+
+import json
+import logging
+import os
+import sys
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s  %(message)s")
+
+MODEL_NAME = os.environ.get("RERANK_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2")
+PORT = int(os.environ.get("RERANK_PORT", "8001"))
+HOST = os.environ.get("RERANK_HOST", "127.0.0.1")
+# Truncate docs to this many chars before scoring. jina-reranker GGUF has a
+# 1024-token per-pair cap that 400s the entire batch; ms-marco is more
+# forgiving but we still cap to keep latency predictable.
+MAX_DOC_CHARS = int(os.environ.get("RERANK_MAX_DOC_CHARS", "2000"))
+
+_model = None
+
+
+def _get_model():
+    global _model
+    if _model is None:
+        from sentence_transformers import CrossEncoder
+        log.info("loading %s", MODEL_NAME)
+        _model = CrossEncoder(MODEL_NAME)
+        log.info("loaded")
+    return _model
+
+
+def _rerank(query: str, documents: list[str], top_n: int | None) -> list[dict]:
+    model = _get_model()
+    pairs = [[query, (d or "")[:MAX_DOC_CHARS]] for d in documents]
+    scores = model.predict(pairs)
+    ranked = sorted(
+        ({"index": i, "relevance_score": float(s)} for i, s in enumerate(scores)),
+        key=lambda r: -r["relevance_score"],
+    )
+    if top_n is not None:
+        ranked = ranked[:top_n]
+    return ranked
+
+
+class Handler(BaseHTTPRequestHandler):
+    def log_message(self, fmt, *args):
+        log.info("%s - %s", self.address_string(), fmt % args)
+
+    def _send_json(self, status: int, payload: dict) -> None:
+        body = json.dumps(payload).encode()
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def do_GET(self):  # noqa: N802
+        if self.path in ("/", "/health"):
+            self._send_json(200, {"status": "ok", "model": MODEL_NAME})
+            return
+        self._send_json(404, {"error": "not found"})
+
+    def do_POST(self):  # noqa: N802
+        if self.path not in ("/v1/rerank", "/rerank"):
+            self._send_json(404, {"error": "not found"})
+            return
+        length = int(self.headers.get("Content-Length", "0"))
+        try:
+            req = json.loads(self.rfile.read(length).decode())
+        except Exception as e:
+            self._send_json(400, {"error": f"bad json: {e}"})
+            return
+        query = req.get("query")
+        documents = req.get("documents")
+        if not isinstance(query, str) or not isinstance(documents, list):
+            self._send_json(400, {"error": "expected {query: str, documents: list[str]}"})
+            return
+        top_n = req.get("top_n")
+        try:
+            results = _rerank(query, documents, top_n if isinstance(top_n, int) else None)
+        except Exception as e:
+            log.exception("rerank failed")
+            self._send_json(500, {"error": str(e)})
+            return
+        self._send_json(200, {"model": MODEL_NAME, "results": results})
+
+
+def main() -> int:
+    _get_model()  # warm-load before accepting traffic
+    server = ThreadingHTTPServer((HOST, PORT), Handler)
+    log.info("listening on http://%s:%d", HOST, PORT)
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        log.info("shutting down")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())