name: Image rebuild (skip scrape) # Fast path for code-only changes. Skips the scrape and goes straight # to: rebuild indexes (from corpus already committed on main) + image # build + push. Runtime ~10 min vs ~2-3 h for the full monthly refresh. # # Use when a PR only changes code/config — anything where the upstream # seed catalogs haven't moved but we want the new Python in the # running image. on: workflow_dispatch: push: branches: - main paths: - "docs_mcp/**" - "rag/**" - "scrape/**" - "requirements.txt" - "Dockerfile" - "sources.json" # If multiple pushes land in quick succession, cancel the older one # rather than queueing both — each run is non-trivial and the older # commit's image just gets overwritten by the newer one anyway. concurrency: group: image-only cancel-in-progress: true env: REGISTRY_PUSH: 192.168.0.2:1234 REGISTRY_PULL: git.jpaul.io IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }} OLLAMA_URL: http://192.168.0.2:11434,http://192.168.0.2:11435,http://192.168.0.125:11434 EMBED_MODEL: nomic-embed-text PRODUCT_NAME: crop_seed jobs: build: runs-on: docker container: image: catthehacker/ubuntu:act-latest steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.12" - name: Install dependencies run: | python -m pip install -q --upgrade pip python -m pip install -q -r requirements.txt - name: Verify committed corpus is present run: | test -d corpus || { echo "ERROR: corpus/ missing on this ref"; exit 1; } n_md=$(find corpus -name '*.md' | wc -l) n_json=$(find corpus -name '*.json' | wc -l) echo "corpus: $(du -sh corpus | cut -f1) on disk, ${n_md} .md / ${n_json} .json" - name: Rebuild indexes from committed corpus run: python -m rag.index --rebuild - name: Log in to Gitea container registry run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login "${REGISTRY_PUSH}" -u "${{ github.repository_owner }}" --password-stdin - name: Build & push image run: | SHA_TAG=$(echo "$GITHUB_SHA" | cut -c1-12) CORPUS_TAG="corpus-$(date -u +%Y.%m.%d)" docker build \ -t "${REGISTRY_PUSH}/${IMAGE}:latest" \ -t "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}" \ -t "${REGISTRY_PUSH}/${IMAGE}:${CORPUS_TAG}" \ . docker push "${REGISTRY_PUSH}/${IMAGE}:latest" docker push "${REGISTRY_PUSH}/${IMAGE}:${SHA_TAG}" docker push "${REGISTRY_PUSH}/${IMAGE}:${CORPUS_TAG}" - name: Link container package to this repo env: GITEA_TOKEN: ${{ secrets.REGISTRY_TOKEN }} run: | OWNER="${{ github.repository_owner }}" PKG="${{ github.event.repository.name }}" BODY=$(mktemp) CODE=$(curl -sS -o "$BODY" -w "%{http_code}" -X POST \ -H "Authorization: token ${GITEA_TOKEN}" \ "https://${REGISTRY_PULL}/api/v1/packages/${OWNER}/container/${PKG}/-/link/${PKG}") echo "link http=$CODE body=$(cat "$BODY")" case "$CODE" in 201) echo "linked package to ${OWNER}/${PKG}" ;; 400) echo "already linked — ok" ;; *) echo "unexpected status $CODE"; exit 1 ;; esac - name: Prune old container versions # GC requires broader scope than REGISTRY_TOKEN's push perms # (HTTP 403 on /packages/.../versions). Non-critical — # housekeeping only. Don't fail the whole run. # TODO: issue separate PAT with admin:package scope and set # as PACKAGES_ADMIN_TOKEN. continue-on-error: true env: GITEA_TOKEN: ${{ secrets.REGISTRY_TOKEN }} run: | python scripts/registry_gc.py \ --owner "${{ github.repository_owner }}" \ --package "${{ github.event.repository.name }}" \ --keep-days 180 \ --keep-latest 6