Files
provenance/backend/app/services/account_service.py
T
justin e9b2436ce0 Account export / restore-into-new-tree / delete
New account_service + endpoints under /users/me:
- GET /me/export — zip of every owned tree (account.json + media blobs).
- POST /me/import — restore a backup into NEW trees (ids remapped, media
  re-uploaded); non-destructive, never touches existing data.
- DELETE /me — soft-delete the user, their owned trees, and revoke sessions;
  guarded by retyping the account email.

Settings page wires all three (export download, restore upload, delete with
typed-email confirmation). No migration — uses existing tables + soft-delete.

52 backend tests pass (export→restore round-trip + delete guards); frontend builds.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 11:26:04 -04:00

353 lines
12 KiB
Python

"""Account-level data portability: export the signed-in user's owned trees as a
zip (JSON + media bytes), restore such a zip into a brand-new tree
(non-destructive), and delete the account.
The export format is a zip containing ``account.json`` plus ``media/<id>`` blobs.
Restore always creates new trees and remaps ids, so it can't clobber existing
data.
"""
import hashlib
import io
import json
import uuid
import zipfile
from datetime import UTC, date, datetime
from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession
from app.integrations.objectstore.base import ObjectStore
from app.models.auth import Session as SessionModel
from app.models.enums import MembershipRole
from app.models.event import Event
from app.models.media import Media
from app.models.person import Name, Person
from app.models.place import Place
from app.models.relationship import Relationship
from app.models.source import Citation, Source
from app.models.tree import Tree, TreeMembership
from app.models.user import User
from app.services.audit import record_audit
from app.services.exceptions import Forbidden, NotFound
EXPORT_VERSION = 1
_DROP = {"created_at", "updated_at", "deleted_at", "tree_id"}
# Media columns rebuilt on import (storage is re-keyed, checksum recomputed).
_MEDIA_DROP = _DROP | {"uploader_id", "storage_key", "byte_size", "checksum_sha256"}
_DATE_FIELDS = {"date_start", "date_end"}
def _row(obj, drop: set[str]) -> dict:
out: dict = {}
for col in obj.__table__.columns.keys(): # noqa: SIM118
if col in drop:
continue
out[col] = getattr(obj, col)
return out
async def _entities(session: AsyncSession, model, tree_id: uuid.UUID):
stmt = select(model).where(model.tree_id == tree_id, model.deleted_at.is_(None))
return list((await session.execute(stmt)).scalars().all())
async def export_account(session: AsyncSession, store: ObjectStore, *, user: User) -> bytes:
"""Build a zip of every tree the user owns: account.json + media blobs."""
trees = list(
(
await session.execute(
select(Tree).where(Tree.owner_id == user.id, Tree.deleted_at.is_(None))
)
).scalars().all()
)
payload: dict = {
"version": EXPORT_VERSION,
"user": {"email": user.email, "display_name": user.display_name},
"trees": [],
}
media_blobs: list[tuple[str, bytes]] = []
for tree in trees:
media_rows = await _entities(session, Media, tree.id)
media_out = []
for m in media_rows:
ref = f"media/{m.id}"
rec = _row(m, _MEDIA_DROP)
rec["_file"] = ref
media_out.append(rec)
try:
media_blobs.append((ref, await store.get_object(key=m.storage_key)))
except Exception: # noqa: BLE001 — a missing blob shouldn't abort the export
rec["_file"] = None
payload["trees"].append({
"tree": {
"name": tree.name,
"description": tree.description,
"visibility": tree.visibility,
"home_person_id": tree.home_person_id,
},
"places": [_row(p, _DROP) for p in await _entities(session, Place, tree.id)],
"persons": [_row(p, _DROP) for p in await _entities(session, Person, tree.id)],
"names": [_row(n, _DROP) for n in await _entities(session, Name, tree.id)],
"relationships": [
_row(r, _DROP) for r in await _entities(session, Relationship, tree.id)
],
"events": [_row(e, _DROP) for e in await _entities(session, Event, tree.id)],
"sources": [_row(s, _DROP) for s in await _entities(session, Source, tree.id)],
"citations": [_row(c, _DROP) for c in await _entities(session, Citation, tree.id)],
"media": media_out,
})
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
zf.writestr("account.json", json.dumps(payload, default=str, indent=2))
for ref, blob in media_blobs:
zf.writestr(ref, blob)
return buf.getvalue()
def _as_uuid(v) -> uuid.UUID | None:
return uuid.UUID(v) if v else None
def _as_date(v) -> date | None:
return date.fromisoformat(v) if v else None
async def import_account(
session: AsyncSession, store: ObjectStore, *, user: User, raw_zip: bytes
) -> dict:
"""Restore an exported zip into NEW trees owned by the user. Non-destructive:
every record gets a fresh id; nothing existing is touched."""
try:
zf = zipfile.ZipFile(io.BytesIO(raw_zip))
payload = json.loads(zf.read("account.json"))
except (zipfile.BadZipFile, KeyError, json.JSONDecodeError) as e:
raise NotFound("not a valid Provenance export") from e
counts: dict[str, int] = {"trees": 0, "persons": 0, "events": 0, "media": 0}
for tdata in payload.get("trees", []):
t = tdata.get("tree", {})
tree = Tree(
owner_id=user.id,
name=(t.get("name") or "Imported tree"),
description=t.get("description"),
visibility=t.get("visibility") or "private",
)
session.add(tree)
await session.flush()
session.add(
TreeMembership(tree_id=tree.id, user_id=user.id, role=MembershipRole.owner)
)
counts["trees"] += 1
# id remaps from the export's ids to the freshly created ones.
pmap: dict[str, uuid.UUID] = {}
rmap: dict[str, uuid.UUID] = {}
smap: dict[str, uuid.UUID] = {}
nmap: dict[str, uuid.UUID] = {}
emap: dict[str, uuid.UUID] = {}
plmap: dict[str, uuid.UUID] = {}
for pl in tdata.get("places", []):
obj = Place(
tree_id=tree.id,
name=pl.get("name") or "",
place_type=pl.get("place_type"),
latitude=pl.get("latitude"),
longitude=pl.get("longitude"),
)
session.add(obj)
await session.flush()
plmap[pl["id"]] = obj.id
for p in tdata.get("persons", []):
obj = Person(
tree_id=tree.id,
gender=p.get("gender"),
is_living=p.get("is_living"),
privacy=p.get("privacy") or "inherit",
notes=p.get("notes"),
)
session.add(obj)
await session.flush()
pmap[p["id"]] = obj.id
counts["persons"] += 1
for n in tdata.get("names", []):
pid = pmap.get(n.get("person_id"))
if pid is None:
continue
obj = Name(
tree_id=tree.id,
person_id=pid,
name_type=n.get("name_type") or "birth",
given=n.get("given"),
surname=n.get("surname"),
prefix=n.get("prefix"),
suffix=n.get("suffix"),
nickname=n.get("nickname"),
display_name=n.get("display_name"),
is_primary=bool(n.get("is_primary")),
sort_order=n.get("sort_order") or 0,
)
session.add(obj)
await session.flush()
nmap[n["id"]] = obj.id
for r in tdata.get("relationships", []):
a = pmap.get(r.get("person_from_id"))
b = pmap.get(r.get("person_to_id"))
if a is None or b is None:
continue
obj = Relationship(
tree_id=tree.id,
type=r.get("type"),
person_from_id=a,
person_to_id=b,
qualifier=r.get("qualifier"),
notes=r.get("notes"),
)
session.add(obj)
await session.flush()
rmap[r["id"]] = obj.id
for e in tdata.get("events", []):
obj = Event(
tree_id=tree.id,
event_type=e.get("event_type") or "other",
person_id=pmap.get(e.get("person_id")),
relationship_id=rmap.get(e.get("relationship_id")),
place_id=plmap.get(e.get("place_id")),
date_value=e.get("date_value"),
date_start=_as_date(e.get("date_start")),
date_end=_as_date(e.get("date_end")),
date_precision=e.get("date_precision"),
calendar=e.get("calendar") or "gregorian",
detail=e.get("detail"),
notes=e.get("notes"),
)
session.add(obj)
await session.flush()
emap[e["id"]] = obj.id
counts["events"] += 1
for s in tdata.get("sources", []):
obj = Source(
tree_id=tree.id,
title=s.get("title") or "Untitled source",
author=s.get("author"),
source_type=s.get("source_type"),
repository=s.get("repository"),
url=s.get("url"),
citation_text=s.get("citation_text"),
publication_info=s.get("publication_info"),
quality_note=s.get("quality_note"),
)
session.add(obj)
await session.flush()
smap[s["id"]] = obj.id
for c in tdata.get("citations", []):
sid = smap.get(c.get("source_id"))
if sid is None:
continue
session.add(
Citation(
tree_id=tree.id,
source_id=sid,
person_id=pmap.get(c.get("person_id")),
event_id=emap.get(c.get("event_id")),
name_id=nmap.get(c.get("name_id")),
relationship_id=rmap.get(c.get("relationship_id")),
page=c.get("page"),
detail=c.get("detail"),
confidence=c.get("confidence"),
)
)
for m in tdata.get("media", []):
ref = m.get("_file")
if not ref:
continue
try:
blob = zf.read(ref)
except KeyError:
continue
media_id = uuid.uuid4()
filename = m.get("original_filename") or "upload"
key = f"{tree.id}/{media_id}/{filename}"
await store.ensure_bucket()
await store.put_object(
key=key,
data=blob,
content_type=m.get("content_type") or "application/octet-stream",
)
session.add(
Media(
id=media_id,
tree_id=tree.id,
uploader_id=user.id,
storage_key=key,
original_filename=filename,
content_type=m.get("content_type") or "application/octet-stream",
byte_size=len(blob),
checksum_sha256=hashlib.sha256(blob).hexdigest(),
title=m.get("title"),
person_id=pmap.get(m.get("person_id")),
event_id=emap.get(m.get("event_id")),
source_id=smap.get(m.get("source_id")),
)
)
counts["media"] += 1
# Remap the home person last, once persons exist.
home = t.get("home_person_id")
if home and home in pmap:
tree.home_person_id = pmap[home]
record_audit(
session,
action="import",
entity_type="Account",
entity_id=tree.id,
tree_id=tree.id,
actor_user_id=user.id,
after=counts,
)
await session.commit()
return counts
async def delete_account(session: AsyncSession, *, user: User, confirm_email: str) -> None:
"""Soft-delete the account: the user, the trees they own, and all their
sessions. Requires the user to retype their email as a guard."""
if confirm_email.strip().lower() != user.email.lower():
raise Forbidden("email confirmation does not match")
now = datetime.now(UTC)
await session.execute(
update(Tree)
.where(Tree.owner_id == user.id, Tree.deleted_at.is_(None))
.values(deleted_at=now)
)
await session.execute(
update(SessionModel)
.where(SessionModel.user_id == user.id, SessionModel.revoked_at.is_(None))
.values(revoked_at=now)
)
user.deleted_at = now
record_audit(
session,
action="delete",
entity_type="User",
entity_id=user.id,
actor_user_id=user.id,
)
await session.commit()