"""Gitea container-registry garbage collection. Lists tagged versions of one container package and deletes old ones. Always preserves: - the `latest` tag (Watchtower's auto-deploy target) - the `--keep-latest` most-recent date-tagged versions (YYYY.MM.DD) - the `--keep-latest` most-recent short-SHA tags (rollback pins) - anything pushed within `--keep-days` days OCI blob-level versions (`sha256:...`) are never touched directly — those are managed by Gitea's internal package GC cron when their last tag goes away. Usage: GITEA_TOKEN=... python scripts/registry_gc.py \\ --owner justin \\ --package hvm-docs \\ --keep-days 90 \\ --keep-latest 5 The Gitea endpoint shape (confirmed 2026-05-22 against git.jpaul.io): GET /api/v1/packages/{owner}/container/{package} -> [{id, version, created_at, ...}, ...] DELETE /api/v1/packages/{owner}/container/{package}/{version} """ from __future__ import annotations import argparse import json import os import re import sys from datetime import datetime, timedelta, timezone from urllib.error import HTTPError from urllib.parse import quote from urllib.request import Request, urlopen GITEA_HOST = os.environ.get("GITEA_HOST", "https://git.jpaul.io") DATE_TAG = re.compile(r"^\d{4}\.\d{2}\.\d{2}$") SHA_TAG = re.compile(r"^[0-9a-f]{7,40}$") # short or full git SHA BLOB_VER = re.compile(r"^sha256:") # OCI blob versions — skip def api(token: str, method: str, path: str) -> object: # Explicit User-Agent: git.jpaul.io is behind Cloudflare, whose default # Bot Fight Mode 403s `Python-urllib/X.Y` with error 1010. Any # recognizable browser/curl-style UA passes. req = Request(f"{GITEA_HOST}{path}", headers={ "Authorization": f"token {token}", "User-Agent": "hvm-docs-registry-gc/1.0", }, method=method) try: with urlopen(req, timeout=30) as r: body = r.read() return json.loads(body) if body else None except HTTPError as e: if e.code == 404: return None raise def main() -> int: p = argparse.ArgumentParser() p.add_argument("--owner", required=True) p.add_argument("--package", required=True) p.add_argument("--keep-days", type=int, default=90) p.add_argument("--keep-latest", type=int, default=5) p.add_argument("--dry-run", action="store_true") args = p.parse_args() token = os.environ.get("GITEA_TOKEN") if not token: print("GITEA_TOKEN not set", file=sys.stderr) return 1 versions = api(token, "GET", f"/api/v1/packages/{args.owner}/container/{args.package}") or [] if not versions: print(f"no versions found for {args.owner}/container/{args.package}") return 0 cutoff = datetime.now(timezone.utc) - timedelta(days=args.keep_days) print(f" {len(versions)} version(s); cutoff={cutoff.isoformat()} " f"keep_days={args.keep_days} keep_latest={args.keep_latest}") # Sort newest first by created_at. def parsed_ts(v: dict) -> datetime: try: return datetime.fromisoformat(v["created_at"].replace("Z", "+00:00")) except (KeyError, ValueError): return datetime.min.replace(tzinfo=timezone.utc) versions.sort(key=parsed_ts, reverse=True) # Compute the keep-set: top-N date tags + top-N sha tags + always latest. keep_dates: list[str] = [] keep_shas: list[str] = [] for v in versions: ver = v.get("version") or "" if DATE_TAG.match(ver) and len(keep_dates) < args.keep_latest: keep_dates.append(ver) elif SHA_TAG.match(ver) and len(keep_shas) < args.keep_latest: keep_shas.append(ver) keep = {"latest", *keep_dates, *keep_shas} print(f" keep tags: {sorted(keep)}") deleted = skipped_blob = skipped_age = skipped_keep = 0 for v in versions: ver = v.get("version") or "" ts = parsed_ts(v) if BLOB_VER.match(ver): skipped_blob += 1 continue if ver in keep: skipped_keep += 1 continue if ts >= cutoff: skipped_age += 1 continue print(f" deleting {ver!r} id={v.get('id')} created={v.get('created_at')}") if not args.dry_run: api(token, "DELETE", f"/api/v1/packages/{args.owner}/container/{args.package}/{quote(ver, safe='')}") deleted += 1 print(f"done: deleted={deleted} kept_named={skipped_keep} " f"kept_recent={skipped_age} skipped_blobs={skipped_blob}") return 0 if __name__ == "__main__": sys.exit(main())