From fd376fab778b9067bd57d65a7ed633971779419f Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Fri, 22 May 2026 13:07:15 -0400 Subject: [PATCH] ci+deploy: target git.jpaul.io registry, PRODUCT_NAME=hvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4/5 — adapt the template workflows to Justin's self-hosted Gitea + act_runner setup (see reference_gitea_server memory): * PUSH via LAN endpoint 192.168.0.2:1234 (bypasses Cloudflare's 100 MB request-body cap on the Free plan); PULL via git.jpaul.io. * buildx with config-inline insecure-registry for the LAN endpoint — docker/login-action can't be used there (host daemon rejects HTTP). Auth is written into ~/.docker/config.json so buildx reads it directly. * docker/metadata-action labels org.opencontainers.image.source with the PUBLIC URL so Gitea auto-links the package; explicit POST to /api/v1/packages/.../-/link/{repo} as belt-and-suspenders (201 newly linked, 400 already linked, both treated as success). * deploy/docker-compose.yml: substitute placeholders, point image at git.jpaul.io/justin/hvm-docs:latest, set HYBRID_SEARCH=false to match the eval winner (bm25+rerank), keep the llama.cpp + jina GGUF reranker sidecar as the production target. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitea/workflows/image-only.yml | 106 ++++++++++++++---------- .gitea/workflows/refresh.yml | 142 ++++++++++++++++++++------------ deploy/docker-compose.yml | 40 +++++---- 3 files changed, 175 insertions(+), 113 deletions(-) diff --git a/.gitea/workflows/image-only.yml b/.gitea/workflows/image-only.yml index abe60b2..0422728 100644 --- a/.gitea/workflows/image-only.yml +++ b/.gitea/workflows/image-only.yml @@ -14,21 +14,14 @@ on: workflow_dispatch: env: - REGISTRY_PUSH: : - REGISTRY_PULL: - # Image name derives from the actual repo at runtime, so a clone - # doesn't need to find/replace anything. e.g. justin/my-product-docs. - # github.* context is Gitea Actions' inherited GitHub-Actions namespace - # — values come from the Gitea server, not github.com. + # PUSH goes to the LAN endpoint (HTTP) to bypass Cloudflare's 100 MB + # body cap. PULL uses the public hostname (HTTPS). Same Gitea registry. + REGISTRY_PUSH: 192.168.0.2:1234 + REGISTRY_PULL: git.jpaul.io IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }} - OLLAMA_URL: http://:11434 + OLLAMA_URL: http://192.168.0.126:11434 EMBED_MODEL: nomic-embed-text - # PRODUCT_NAME defaults to the repo name so a clone works without - # editing. Override here if you want a different identifier (e.g. - # repo "my-product-docs" → PRODUCT_NAME "myproduct"). Used as the - # Chroma collection name, BM25 db filename, and MCP server name — - # see docs_mcp/server.py. - PRODUCT_NAME: ${{ github.event.repository.name }} + PRODUCT_NAME: hvm jobs: build: @@ -39,8 +32,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 with: - # Full history (not shallow) so the digest-history step can - # walk git log up to --history-days back. + # Full history so digest-history can walk git log. fetch-depth: 0 - name: Set up Python @@ -54,9 +46,8 @@ jobs: python -m pip install -q -r requirements.txt - name: Refresh digest history - # Cheap (a few seconds); doesn't touch corpus content. - # Without this step, a code-only deploy would ship an - # increasingly-stale digest history relative to git. + # Cheap (few seconds). Without this step, a code-only deploy + # would ship an increasingly-stale digest history. run: | mkdir -p corpus/.digest python -m scrape.changelog \ @@ -71,42 +62,69 @@ jobs: - name: Rebuild indexes from existing corpus run: python -m rag.index --rebuild - - name: Log in to registry (LAN endpoint) - run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login "${REGISTRY_PUSH}" -u "${{ github.repository_owner }}" --password-stdin + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + # LAN registry is HTTP only. + config-inline: | + [registry."192.168.0.2:1234"] + http = true + insecure = true - - name: Build & push image + - name: Configure registry credentials for buildx + env: + REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} + REGISTRY_USER: ${{ github.actor }} run: | - SHA_TAG=$(echo "$GITHUB_SHA" | cut -c1-12) - DATE_TAG=$(date -u +%Y.%m.%d) - docker build \ - -t "${REGISTRY_PUSH}/${IMAGE}:latest" \ - -t "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}" \ - -t "${REGISTRY_PUSH}/${IMAGE}:${DATE_TAG}" \ - . - docker push "${REGISTRY_PUSH}/${IMAGE}:latest" - docker push "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}" - docker push "${REGISTRY_PUSH}/${IMAGE}:${DATE_TAG}" + mkdir -p ~/.docker + AUTH=$(printf '%s:%s' "$REGISTRY_USER" "$REGISTRY_TOKEN" | base64 -w0) + cat > ~/.docker/config.json < ${PKG}: HTTP ${code}" + body=$(cat /tmp/link.out) + case "$code" in + 201) echo "OK — newly linked" ;; + 400|409) echo "OK — already linked: ${body}" ;; + *) echo "unexpected: ${body}"; exit 1 ;; esac - name: Prune old container versions diff --git a/.gitea/workflows/refresh.yml b/.gitea/workflows/refresh.yml index ef7f504..9a48be6 100644 --- a/.gitea/workflows/refresh.yml +++ b/.gitea/workflows/refresh.yml @@ -19,27 +19,23 @@ on: default: false env: - # If your registry sits behind Cloudflare with its 100 MB body cap, - # use a LAN endpoint for pushes (bypasses CF) and the public hostname - # for pulls (response bodies aren't capped). - REGISTRY_PUSH: : - REGISTRY_PULL: - # Image name derives from the actual repo at runtime, so a clone - # doesn't need to find/replace anything. e.g. justin/my-product-docs. - # github.* context is Gitea Actions' inherited GitHub-Actions namespace - # — values come from the Gitea server, not github.com. + # PUSH goes to the LAN endpoint (HTTP) to bypass Cloudflare Tunnel's + # 100 MB body cap. PULL uses the public hostname (HTTPS). Same Gitea + # registry either way — package lands under the same owner/repo. + REGISTRY_PUSH: 192.168.0.2:1234 + REGISTRY_PULL: git.jpaul.io + + # Image name derives from the repo at runtime — clones don't need to + # edit this. github.* is the Gitea-Actions inherited namespace. IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }} - # Embedder. One URL per GPU; the indexer round-robins. - OLLAMA_URL: http://:11434 + # Embedder. One URL per GPU; the indexer round-robins if you pass a + # comma-separated list. Adjust to wherever Ollama is reachable from + # the runner (gitea_default network can reach the host's bridge IP). + OLLAMA_URL: http://192.168.0.126:11434 EMBED_MODEL: nomic-embed-text - # PRODUCT_NAME defaults to the repo name so a clone works without - # editing. Override here if you want a different identifier (e.g. - # repo "my-product-docs" → PRODUCT_NAME "myproduct"). Used as the - # Chroma collection name, BM25 db filename, and MCP server name — - # see docs_mcp/server.py. - PRODUCT_NAME: ${{ github.event.repository.name }} + PRODUCT_NAME: hvm jobs: refresh: @@ -50,10 +46,12 @@ jobs: - name: Checkout uses: actions/checkout@v4 with: - # Full history — required for the digest-history step to - # walk git log. Default fetch-depth: 1 silently produces a - # 0-byte history file. + # Full history — required for digest-history. Default depth 1 + # silently produces a 0-byte history file. fetch-depth: 0 + # Set the credentials Gitea injects so we can push corpus + # commits back. Persist them across the run. + token: ${{ secrets.GITEA_TOKEN }} - name: Set up Python uses: actions/setup-python@v5 @@ -89,8 +87,8 @@ jobs: - name: Commit corpus changes (if any) id: commit run: | - git config user.name "-docs-refresh" - git config user.email "actions@" + git config user.name "hvm-docs-refresh" + git config user.email "actions@jpaul.io" git add bundles.json corpus if git diff --cached --quiet; then echo "no corpus changes — skipping reindex and image build" @@ -132,49 +130,89 @@ jobs: if: steps.commit.outputs.changed == 'true' || inputs.force_build == true run: python -m rag.index --rebuild - # ---- Build & push image ------------------------------------ - - name: Log in to registry (LAN endpoint) + # ---- Build & push image (LAN endpoint, buildx) ------------- + - name: Set up Docker Buildx if: steps.commit.outputs.changed == 'true' || inputs.force_build == true - run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login "${REGISTRY_PUSH}" -u "${{ github.repository_owner }}" --password-stdin + uses: docker/setup-buildx-action@v3 + with: + # LAN registry is HTTP only. Buildkit needs an explicit + # insecure-registry config or it tries to upgrade to HTTPS. + config-inline: | + [registry."192.168.0.2:1234"] + http = true + insecure = true - - name: Build & push image + - name: Configure registry credentials for buildx + # Can't use docker/login-action against the LAN endpoint — + # the host docker daemon errors on HTTP-vs-HTTPS. Buildx reads + # ~/.docker/config.json directly, so write the auth ourselves. if: steps.commit.outputs.changed == 'true' || inputs.force_build == true - # Runner shell is /bin/sh — use cut instead of ${VAR::N}. - # Three tags: :latest (Watchtower target), : - # (rollback pin), : (human-readable). + env: + REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} + REGISTRY_USER: ${{ github.actor }} run: | - SHA_TAG=$(echo "$GITHUB_SHA" | cut -c1-12) - DATE_TAG=$(date -u +%Y.%m.%d) - docker build \ - -t "${REGISTRY_PUSH}/${IMAGE}:latest" \ - -t "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}" \ - -t "${REGISTRY_PUSH}/${IMAGE}:${DATE_TAG}" \ - . - docker push "${REGISTRY_PUSH}/${IMAGE}:latest" - docker push "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}" - docker push "${REGISTRY_PUSH}/${IMAGE}:${DATE_TAG}" + mkdir -p ~/.docker + AUTH=$(printf '%s:%s' "$REGISTRY_USER" "$REGISTRY_TOKEN" | base64 -w0) + cat > ~/.docker/config.json < ${PKG}: HTTP ${code}" + body=$(cat /tmp/link.out) + case "$code" in + 201) echo "OK — newly linked" ;; + 400|409) echo "OK — already linked: ${body}" ;; + *) echo "unexpected: ${body}"; exit 1 ;; esac # ---- Registry GC ------------------------------------------- diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index 0aa05a8..b75a41c 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -1,6 +1,6 @@ # Hosting stack for a docs MCP server. # -# Replace below with your product name on first deploy. +# Replace hvm below with your product name on first deploy. # Volumes: usage logs are mounted to a host path so they survive # Watchtower-driven container recreates. # @@ -10,15 +10,15 @@ services: # The MCP server. Watchtower auto-pulls on :latest changes. - -docs-mcp: - image: //-docs-mcp:latest - container_name: -docs-mcp + hvm-docs-mcp: + image: git.jpaul.io/justin/hvm-docs:latest + container_name: hvm-docs-mcp restart: unless-stopped ports: - "8000:8000" environment: - PRODUCT_NAME: "" - PRODUCT_DOCS_URL: "https://docs.example.com" + PRODUCT_NAME: "hvm" + PRODUCT_DOCS_URL: "https://support.hpe.com/hpesc/public/docDisplay?docId=sd00007735en_us" # Streamable-HTTP transport. Stateless mode is required for # production: clients don't lose sessions when Watchtower @@ -28,19 +28,21 @@ services: MCP_PORT: "8000" # If you run MetaMCP or another gateway in front and reach - # this container via its compose DNS name (e.g. -docs-mcp:8000), + # this container via its compose DNS name (e.g. hvm-docs-mcp:8000), # add that hostname here. "*" disables the rebind check entirely. - MCP_ALLOWED_HOSTS: "-docs-mcp,localhost,127.0.0.1" + MCP_ALLOWED_HOSTS: "hvm-docs-mcp,localhost,127.0.0.1" # Phase 6 — reranker sidecar (jina-reranker-v2-base via llama.cpp). - RERANK_URL: http://-rerank:8080 + RERANK_URL: http://hvm-rerank:8080 RERANK_POOL: "200" RERANK_TIMEOUT: "30" - # Phase 8 — hybrid retrieval (BM25 + dense + RRF). Set true - # only after the eval harness shows the dense-only path - # missing technical-term queries that BM25 catches. - HYBRID_SEARCH: "true" + # Phase 8 — hybrid retrieval (BM25 + dense + RRF). + # Eval on the HVM corpus (eval/results/baseline.md, 2026-05-22) shows + # BM25-default + reranker beats hybrid on every metric (MRR 0.920 vs + # 0.875). Leaving HYBRID_SEARCH off so search_docs runs BM25-first + + # reranker; dense is the fallback when BM25 finds nothing. + HYBRID_SEARCH: "false" # Phase 10 — usage telemetry. USAGE_LOG_DIR: /app/var/logs @@ -52,9 +54,9 @@ services: # DOC_BUG_API_URL: "https://docs-be.example.com/api/feedback" volumes: # Usage logs persist across container recreates. - - ./-docs-mcp-logs:/app/var/logs + - ./hvm-docs-mcp-logs:/app/var/logs depends_on: - - -rerank + - hvm-rerank labels: # Watchtower polls *only* containers with this label set true. com.centurylinklabs.watchtower.enable: "true" @@ -63,9 +65,13 @@ services: # Reranker sidecar — llama.cpp serving jina-reranker-v2-base. # Requires GPU access; adjust runtime/devices for your hardware. - -rerank: + # + # For dev / CPU-only hosts, swap this service for scripts/rerank_server.py + # (sentence-transformers ms-marco-MiniLM-L-6-v2). Same /v1/rerank shape, + # ~500ms/batch on CPU vs ~50ms on GPU with the jina GGUF. + hvm-rerank: image: ghcr.io/ggml-org/llama.cpp:server-cuda - container_name: -rerank + container_name: hvm-rerank restart: unless-stopped # Mount the GGUF model from the host. Download from huggingface # (gguf-org/jina-reranker-v2-base-multilingual-GGUF) first.