"""Account-level data portability: export the signed-in user's owned trees as a zip (JSON + media bytes), restore such a zip into a brand-new tree (non-destructive), and delete the account. The export format is a zip containing ``account.json`` plus ``media/`` blobs. Restore always creates new trees and remaps ids, so it can't clobber existing data. """ import hashlib import io import json import uuid import zipfile from datetime import UTC, date, datetime from sqlalchemy import select, update from sqlalchemy.ext.asyncio import AsyncSession from app.integrations.objectstore.base import ObjectStore from app.models.auth import Session as SessionModel from app.models.enums import MembershipRole from app.models.event import Event from app.models.media import Media from app.models.person import Name, Person from app.models.place import Place from app.models.relationship import Relationship from app.models.source import Citation, Source from app.models.tree import Tree, TreeMembership from app.models.user import User from app.services.audit import record_audit from app.services.exceptions import Forbidden, NotFound EXPORT_VERSION = 1 _DROP = {"created_at", "updated_at", "deleted_at", "tree_id"} # Media columns rebuilt on import (storage is re-keyed, checksum recomputed). _MEDIA_DROP = _DROP | {"uploader_id", "storage_key", "byte_size", "checksum_sha256"} _DATE_FIELDS = {"date_start", "date_end"} def _row(obj, drop: set[str]) -> dict: out: dict = {} for col in obj.__table__.columns.keys(): # noqa: SIM118 if col in drop: continue out[col] = getattr(obj, col) return out async def _entities(session: AsyncSession, model, tree_id: uuid.UUID): stmt = select(model).where(model.tree_id == tree_id, model.deleted_at.is_(None)) return list((await session.execute(stmt)).scalars().all()) async def export_account(session: AsyncSession, store: ObjectStore, *, user: User) -> bytes: """Build a zip of every tree the user owns: account.json + media blobs.""" trees = list( ( await session.execute( select(Tree).where(Tree.owner_id == user.id, Tree.deleted_at.is_(None)) ) ).scalars().all() ) payload: dict = { "version": EXPORT_VERSION, "user": {"email": user.email, "display_name": user.display_name}, "trees": [], } media_blobs: list[tuple[str, bytes]] = [] for tree in trees: media_rows = await _entities(session, Media, tree.id) media_out = [] for m in media_rows: ref = f"media/{m.id}" rec = _row(m, _MEDIA_DROP) rec["_file"] = ref media_out.append(rec) try: media_blobs.append((ref, await store.get_object(key=m.storage_key))) except Exception: # noqa: BLE001 — a missing blob shouldn't abort the export rec["_file"] = None payload["trees"].append({ "tree": { "name": tree.name, "description": tree.description, "visibility": tree.visibility, "home_person_id": tree.home_person_id, }, "places": [_row(p, _DROP) for p in await _entities(session, Place, tree.id)], "persons": [_row(p, _DROP) for p in await _entities(session, Person, tree.id)], "names": [_row(n, _DROP) for n in await _entities(session, Name, tree.id)], "relationships": [ _row(r, _DROP) for r in await _entities(session, Relationship, tree.id) ], "events": [_row(e, _DROP) for e in await _entities(session, Event, tree.id)], "sources": [_row(s, _DROP) for s in await _entities(session, Source, tree.id)], "citations": [_row(c, _DROP) for c in await _entities(session, Citation, tree.id)], "media": media_out, }) buf = io.BytesIO() with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: zf.writestr("account.json", json.dumps(payload, default=str, indent=2)) for ref, blob in media_blobs: zf.writestr(ref, blob) return buf.getvalue() def _as_uuid(v) -> uuid.UUID | None: return uuid.UUID(v) if v else None def _as_date(v) -> date | None: return date.fromisoformat(v) if v else None async def import_account( session: AsyncSession, store: ObjectStore, *, user: User, raw_zip: bytes ) -> dict: """Restore an exported zip into NEW trees owned by the user. Non-destructive: every record gets a fresh id; nothing existing is touched.""" try: zf = zipfile.ZipFile(io.BytesIO(raw_zip)) payload = json.loads(zf.read("account.json")) except (zipfile.BadZipFile, KeyError, json.JSONDecodeError) as e: raise NotFound("not a valid Provenance export") from e counts: dict[str, int] = {"trees": 0, "persons": 0, "events": 0, "media": 0} for tdata in payload.get("trees", []): t = tdata.get("tree", {}) tree = Tree( owner_id=user.id, name=(t.get("name") or "Imported tree"), description=t.get("description"), visibility=t.get("visibility") or "private", ) session.add(tree) await session.flush() session.add( TreeMembership(tree_id=tree.id, user_id=user.id, role=MembershipRole.owner) ) counts["trees"] += 1 # id remaps from the export's ids to the freshly created ones. pmap: dict[str, uuid.UUID] = {} rmap: dict[str, uuid.UUID] = {} smap: dict[str, uuid.UUID] = {} nmap: dict[str, uuid.UUID] = {} emap: dict[str, uuid.UUID] = {} plmap: dict[str, uuid.UUID] = {} for pl in tdata.get("places", []): obj = Place( tree_id=tree.id, name=pl.get("name") or "", place_type=pl.get("place_type"), latitude=pl.get("latitude"), longitude=pl.get("longitude"), ) session.add(obj) await session.flush() plmap[pl["id"]] = obj.id for p in tdata.get("persons", []): obj = Person( tree_id=tree.id, gender=p.get("gender"), is_living=p.get("is_living"), privacy=p.get("privacy") or "inherit", notes=p.get("notes"), ) session.add(obj) await session.flush() pmap[p["id"]] = obj.id counts["persons"] += 1 for n in tdata.get("names", []): pid = pmap.get(n.get("person_id")) if pid is None: continue obj = Name( tree_id=tree.id, person_id=pid, name_type=n.get("name_type") or "birth", given=n.get("given"), surname=n.get("surname"), prefix=n.get("prefix"), suffix=n.get("suffix"), nickname=n.get("nickname"), display_name=n.get("display_name"), is_primary=bool(n.get("is_primary")), sort_order=n.get("sort_order") or 0, ) session.add(obj) await session.flush() nmap[n["id"]] = obj.id for r in tdata.get("relationships", []): a = pmap.get(r.get("person_from_id")) b = pmap.get(r.get("person_to_id")) if a is None or b is None: continue obj = Relationship( tree_id=tree.id, type=r.get("type"), person_from_id=a, person_to_id=b, qualifier=r.get("qualifier"), notes=r.get("notes"), ) session.add(obj) await session.flush() rmap[r["id"]] = obj.id for e in tdata.get("events", []): obj = Event( tree_id=tree.id, event_type=e.get("event_type") or "other", person_id=pmap.get(e.get("person_id")), relationship_id=rmap.get(e.get("relationship_id")), place_id=plmap.get(e.get("place_id")), date_value=e.get("date_value"), date_start=_as_date(e.get("date_start")), date_end=_as_date(e.get("date_end")), date_precision=e.get("date_precision"), calendar=e.get("calendar") or "gregorian", detail=e.get("detail"), notes=e.get("notes"), ) session.add(obj) await session.flush() emap[e["id"]] = obj.id counts["events"] += 1 for s in tdata.get("sources", []): obj = Source( tree_id=tree.id, title=s.get("title") or "Untitled source", author=s.get("author"), source_type=s.get("source_type"), repository=s.get("repository"), url=s.get("url"), citation_text=s.get("citation_text"), publication_info=s.get("publication_info"), quality_note=s.get("quality_note"), ) session.add(obj) await session.flush() smap[s["id"]] = obj.id for c in tdata.get("citations", []): sid = smap.get(c.get("source_id")) if sid is None: continue session.add( Citation( tree_id=tree.id, source_id=sid, person_id=pmap.get(c.get("person_id")), event_id=emap.get(c.get("event_id")), name_id=nmap.get(c.get("name_id")), relationship_id=rmap.get(c.get("relationship_id")), page=c.get("page"), detail=c.get("detail"), confidence=c.get("confidence"), ) ) for m in tdata.get("media", []): ref = m.get("_file") if not ref: continue try: blob = zf.read(ref) except KeyError: continue media_id = uuid.uuid4() filename = m.get("original_filename") or "upload" key = f"{tree.id}/{media_id}/{filename}" await store.ensure_bucket() await store.put_object( key=key, data=blob, content_type=m.get("content_type") or "application/octet-stream", ) session.add( Media( id=media_id, tree_id=tree.id, uploader_id=user.id, storage_key=key, original_filename=filename, content_type=m.get("content_type") or "application/octet-stream", byte_size=len(blob), checksum_sha256=hashlib.sha256(blob).hexdigest(), title=m.get("title"), person_id=pmap.get(m.get("person_id")), event_id=emap.get(m.get("event_id")), source_id=smap.get(m.get("source_id")), ) ) counts["media"] += 1 # Remap the home person last, once persons exist. home = t.get("home_person_id") if home and home in pmap: tree.home_person_id = pmap[home] record_audit( session, action="import", entity_type="Account", entity_id=tree.id, tree_id=tree.id, actor_user_id=user.id, after=counts, ) await session.commit() return counts async def delete_account(session: AsyncSession, *, user: User, confirm_email: str) -> None: """Soft-delete the account: the user, the trees they own, and all their sessions. Requires the user to retype their email as a guard.""" if confirm_email.strip().lower() != user.email.lower(): raise Forbidden("email confirmation does not match") now = datetime.now(UTC) await session.execute( update(Tree) .where(Tree.owner_id == user.id, Tree.deleted_at.is_(None)) .values(deleted_at=now) ) await session.execute( update(SessionModel) .where(SessionModel.user_id == user.id, SessionModel.revoked_at.is_(None)) .values(revoked_at=now) ) user.deleted_at = now record_audit( session, action="delete", entity_type="User", entity_id=user.id, actor_user_id=user.id, ) await session.commit()