From 86524254135fb92f9a106f83f99e2902f47266ec Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Tue, 9 Jun 2026 12:45:33 -0400 Subject: [PATCH] Fix #196: one-command operator backup (pg_dump + MinIO) Move backup from a documented procedure to `deploy/backup.sh`: dumps Postgres (pg_dump --clean --if-exists, gzipped) and archives the MinIO /data directory into a single timestamped bundle under backups/. Reads config from the compose .env with the same defaults the stack uses; optional BACKUP_RETAIN_DAYS prunes old bundles (cron-friendly). BACKUP.md documents usage + the restore procedure (kept manual/documented rather than an untested destructive script). Closes #196 Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Justin Paul --- deploy/BACKUP.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++ deploy/backup.sh | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 deploy/BACKUP.md create mode 100755 deploy/backup.sh diff --git a/deploy/BACKUP.md b/deploy/BACKUP.md new file mode 100644 index 0000000..c155ab7 --- /dev/null +++ b/deploy/BACKUP.md @@ -0,0 +1,55 @@ +# Backup & restore + +`backup.sh` produces a single bundle containing the Postgres database and the +MinIO object store. Run it from this `deploy/` directory on the host that runs +the stack. + +## Back up + +```bash +./backup.sh +# → backups/provenance-backup-20260609T140000Z.tar +``` + +The bundle contains: + +- `db.sql.gz` — `pg_dump --clean --if-exists` of the database, gzipped. +- `minio-data.tar.gz` — the MinIO `/data` directory (objects + bucket metadata). +- `MANIFEST.txt` — what's inside and when it was made. + +Optional retention: `BACKUP_RETAIN_DAYS=30 ./backup.sh` also deletes bundles +older than 30 days. Schedule it from cron for off-box copies, e.g.: + +```cron +15 3 * * * cd /path/to/provenance/deploy && BACKUP_RETAIN_DAYS=30 ./backup.sh +``` + +(Copy the resulting bundle off the host — a backup on the same disk isn't one.) + +## Restore + +Restoring overwrites live data — stop the app first. + +```bash +ts=20260609T140000Z # the bundle you're restoring +mkdir -p /tmp/restore && tar xf backups/provenance-backup-$ts.tar -C /tmp/restore + +# 1. Database — the dump is --clean, so it drops & recreates objects. +docker compose stop backend worker +gunzip -c /tmp/restore/db.sql.gz \ + | docker compose exec -T postgres psql -U "${POSTGRES_USER:-provenance}" -d "${POSTGRES_DB:-provenance}" + +# 2. Objects — replace the MinIO data directory. +docker compose stop minio +docker compose run --rm --no-deps -T -v provenance_miniodata:/data minio \ + sh -c 'rm -rf /data/* && tar xzf - -C /data' < /tmp/restore/minio-data.tar.gz +docker compose up -d + +rm -rf /tmp/restore +``` + +Notes: +- The MinIO `/data` archive is filesystem-level; restore into the **same** MinIO + major version it was taken from. +- Verify the volume name (`docker volume ls | grep miniodata`) — compose prefixes + it with the project name; adjust the `-v` mount accordingly. diff --git a/deploy/backup.sh b/deploy/backup.sh new file mode 100755 index 0000000..ac40fba --- /dev/null +++ b/deploy/backup.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# +# One-command backup of a Provenance deployment: the Postgres database and the +# MinIO object store, into a single timestamped bundle under ./backups/. +# +# ./backup.sh # write backups/provenance-backup-.tar +# BACKUP_RETAIN_DAYS=30 ./backup.sh # also prune bundles older than 30 days +# +# Run it from the host where `docker compose` manages the stack (i.e. this +# deploy/ directory). Restore steps are in BACKUP.md. +set -euo pipefail + +cd "$(dirname "$0")" # the deploy/ directory (where docker-compose.yml lives) + +# Config comes from the compose .env (same file the stack uses); fall back to +# the compose defaults so a vanilla stack still backs up. +if [ -f .env ]; then set -a; . ./.env; set +a; fi +PGUSER="${POSTGRES_USER:-provenance}" +PGDB="${POSTGRES_DB:-provenance}" + +dc() { docker compose "$@"; } + +ts="$(date -u +%Y%m%dT%H%M%SZ)" +work="backups/.work-$ts" +mkdir -p "$work" backups + +cleanup() { rm -rf "$work"; } +trap cleanup EXIT + +echo "→ Dumping Postgres database '$PGDB'…" +dc exec -T postgres pg_dump -U "$PGUSER" -d "$PGDB" --no-owner --clean --if-exists \ + | gzip > "$work/db.sql.gz" + +echo "→ Archiving MinIO object store…" +# Tar MinIO's data directory straight from the container (objects + bucket +# metadata). Restored by extracting back into the miniodata volume. +dc exec -T minio tar czf - -C /data . > "$work/minio-data.tar.gz" + +cat > "$work/MANIFEST.txt" <