"""Gitea container-registry garbage collection. Prunes old container tags from a Gitea registry package. Always preserves: - The ``latest`` tag (Watchtower auto-pull target) - Any ``corpus-*`` tag (production pins; Drawbar may have them locked) - The ``--keep-latest`` most-recent OTHER tags (typically commit-sha pins) - Anything pushed within ``--keep-days`` days The actual disk reclaim happens on Gitea's next package GC cron (admin site settings). This script marks versions for deletion. Why this script doesn't use the Docker Registry v2 API: that API has tag listing + manifest delete by digest, but no per-tag created-at timestamp without an extra blob-fetch round-trip. Gitea's packages API gives us {tag, created_at} in one call, which is what the keep policy needs. The endpoint shape that actually works (matches Gitea 1.21+): GET /api/v1/packages/{owner}?type=container&q={name} → JSON array, ONE entry per tag, each with id + version=tag + created_at DELETE /api/v1/packages/{owner}/container/{name}/{tag} → 204 on success, 404 if already gone Auth: GITEA_TOKEN env var (PAT with delete:packages scope; the push-only PAT we use as REGISTRY_TOKEN may not be enough — if you see 403s, mint a separate PAT and pass it as GITEA_TOKEN here). Usage: python scripts/registry_gc.py \\ --owner justin \\ --package crop-chem-docs \\ --keep-days 180 \\ --keep-latest 6 [--dry-run] """ from __future__ import annotations import argparse import json import os import sys from datetime import datetime, timedelta, timezone from urllib.error import HTTPError from urllib.request import Request, urlopen GITEA_HOST = os.environ.get("GITEA_HOST", "https://git.jpaul.io") def api(token: str, method: str, path: str) -> object: # User-Agent matters: Cloudflare in front of git.jpaul.io returns # 403 to the default `Python-urllib/3.x` UA. Any non-Python UA # passes. Curl works, requests works, we just need to not look # like a vanilla urllib script. req = Request( f"{GITEA_HOST}{path}", headers={ "Authorization": f"token {token}", "User-Agent": "crop-chem-docs-registry-gc/0.1", }, method=method, ) try: with urlopen(req, timeout=30) as r: body = r.read() return json.loads(body) if body else None except HTTPError as e: if e.code == 404: return None raise def _parse_created(version: dict) -> datetime: """Gitea returns RFC3339 with offset like '2026-05-24T16:07:50-04:00'. Python 3.11+ handles this directly via fromisoformat.""" return datetime.fromisoformat(version["created_at"]) def main() -> int: p = argparse.ArgumentParser() p.add_argument("--owner", required=True) p.add_argument("--package", required=True) p.add_argument("--keep-days", type=int, default=180) p.add_argument("--keep-latest", type=int, default=6, help="Keep this many most-recent commit-sha (etc.) " "tags BEFORE applying --keep-days. corpus-* and " ":latest are kept regardless.") p.add_argument("--dry-run", action="store_true", help="Show what would be deleted without calling DELETE.") args = p.parse_args() token = os.environ.get("GITEA_TOKEN") if not token: print("GITEA_TOKEN env var not set", file=sys.stderr) return 1 # Gitea's q= is a substring match; filter to exact name so we don't # accidentally GC a sibling package that shares the prefix. versions = api( token, "GET", f"/api/v1/packages/{args.owner}?type=container&q={args.package}", ) or [] versions = [v for v in versions if v.get("name") == args.package] if not versions: print(f"no versions found for {args.owner}/{args.package} — nothing to GC") return 0 cutoff = datetime.now(timezone.utc) - timedelta(days=args.keep_days) versions.sort(key=_parse_created, reverse=True) # newest first keep: list[tuple[str, str]] = [] # (tag, reason) delete: list[dict] = [] other_kept = 0 for v in versions: tag = v.get("version", "") created = _parse_created(v) if tag == "latest": keep.append((tag, "always-keep (:latest)")) continue if tag.startswith("corpus-"): keep.append((tag, "production pin (corpus-*)")) continue if other_kept < args.keep_latest: other_kept += 1 keep.append((tag, f"keep-latest #{other_kept}/{args.keep_latest}")) continue if created >= cutoff: keep.append((tag, f"within --keep-days ({args.keep_days})")) continue delete.append(v) print(f"=== {args.owner}/{args.package}: {len(versions)} total tag(s) ===") for tag, reason in keep: print(f" KEEP {tag:<28} {reason}") for v in delete: print(f" DEL {v['version']:<28} created={v['created_at']}") if not delete: print("nothing to delete") return 0 if args.dry_run: print(f"--dry-run; would delete {len(delete)} tag(s)") return 0 failed = 0 for v in delete: tag = v["version"] try: api(token, "DELETE", f"/api/v1/packages/{args.owner}/container/{args.package}/{tag}") print(f" ✓ deleted {tag}") except HTTPError as e: print(f" ✗ failed {tag}: HTTP {e.code} {e.reason}", file=sys.stderr) failed += 1 print(f"done: deleted {len(delete) - failed} / {len(delete)} tag(s)") return 0 if failed == 0 else 1 if __name__ == "__main__": sys.exit(main())