e9b2436ce0
New account_service + endpoints under /users/me: - GET /me/export — zip of every owned tree (account.json + media blobs). - POST /me/import — restore a backup into NEW trees (ids remapped, media re-uploaded); non-destructive, never touches existing data. - DELETE /me — soft-delete the user, their owned trees, and revoke sessions; guarded by retyping the account email. Settings page wires all three (export download, restore upload, delete with typed-email confirmation). No migration — uses existing tables + soft-delete. 52 backend tests pass (export→restore round-trip + delete guards); frontend builds. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
353 lines
12 KiB
Python
353 lines
12 KiB
Python
"""Account-level data portability: export the signed-in user's owned trees as a
|
|
zip (JSON + media bytes), restore such a zip into a brand-new tree
|
|
(non-destructive), and delete the account.
|
|
|
|
The export format is a zip containing ``account.json`` plus ``media/<id>`` blobs.
|
|
Restore always creates new trees and remaps ids, so it can't clobber existing
|
|
data.
|
|
"""
|
|
|
|
import hashlib
|
|
import io
|
|
import json
|
|
import uuid
|
|
import zipfile
|
|
from datetime import UTC, date, datetime
|
|
|
|
from sqlalchemy import select, update
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.integrations.objectstore.base import ObjectStore
|
|
from app.models.auth import Session as SessionModel
|
|
from app.models.enums import MembershipRole
|
|
from app.models.event import Event
|
|
from app.models.media import Media
|
|
from app.models.person import Name, Person
|
|
from app.models.place import Place
|
|
from app.models.relationship import Relationship
|
|
from app.models.source import Citation, Source
|
|
from app.models.tree import Tree, TreeMembership
|
|
from app.models.user import User
|
|
from app.services.audit import record_audit
|
|
from app.services.exceptions import Forbidden, NotFound
|
|
|
|
EXPORT_VERSION = 1
|
|
_DROP = {"created_at", "updated_at", "deleted_at", "tree_id"}
|
|
# Media columns rebuilt on import (storage is re-keyed, checksum recomputed).
|
|
_MEDIA_DROP = _DROP | {"uploader_id", "storage_key", "byte_size", "checksum_sha256"}
|
|
_DATE_FIELDS = {"date_start", "date_end"}
|
|
|
|
|
|
def _row(obj, drop: set[str]) -> dict:
|
|
out: dict = {}
|
|
for col in obj.__table__.columns.keys(): # noqa: SIM118
|
|
if col in drop:
|
|
continue
|
|
out[col] = getattr(obj, col)
|
|
return out
|
|
|
|
|
|
async def _entities(session: AsyncSession, model, tree_id: uuid.UUID):
|
|
stmt = select(model).where(model.tree_id == tree_id, model.deleted_at.is_(None))
|
|
return list((await session.execute(stmt)).scalars().all())
|
|
|
|
|
|
async def export_account(session: AsyncSession, store: ObjectStore, *, user: User) -> bytes:
|
|
"""Build a zip of every tree the user owns: account.json + media blobs."""
|
|
trees = list(
|
|
(
|
|
await session.execute(
|
|
select(Tree).where(Tree.owner_id == user.id, Tree.deleted_at.is_(None))
|
|
)
|
|
).scalars().all()
|
|
)
|
|
|
|
payload: dict = {
|
|
"version": EXPORT_VERSION,
|
|
"user": {"email": user.email, "display_name": user.display_name},
|
|
"trees": [],
|
|
}
|
|
media_blobs: list[tuple[str, bytes]] = []
|
|
|
|
for tree in trees:
|
|
media_rows = await _entities(session, Media, tree.id)
|
|
media_out = []
|
|
for m in media_rows:
|
|
ref = f"media/{m.id}"
|
|
rec = _row(m, _MEDIA_DROP)
|
|
rec["_file"] = ref
|
|
media_out.append(rec)
|
|
try:
|
|
media_blobs.append((ref, await store.get_object(key=m.storage_key)))
|
|
except Exception: # noqa: BLE001 — a missing blob shouldn't abort the export
|
|
rec["_file"] = None
|
|
|
|
payload["trees"].append({
|
|
"tree": {
|
|
"name": tree.name,
|
|
"description": tree.description,
|
|
"visibility": tree.visibility,
|
|
"home_person_id": tree.home_person_id,
|
|
},
|
|
"places": [_row(p, _DROP) for p in await _entities(session, Place, tree.id)],
|
|
"persons": [_row(p, _DROP) for p in await _entities(session, Person, tree.id)],
|
|
"names": [_row(n, _DROP) for n in await _entities(session, Name, tree.id)],
|
|
"relationships": [
|
|
_row(r, _DROP) for r in await _entities(session, Relationship, tree.id)
|
|
],
|
|
"events": [_row(e, _DROP) for e in await _entities(session, Event, tree.id)],
|
|
"sources": [_row(s, _DROP) for s in await _entities(session, Source, tree.id)],
|
|
"citations": [_row(c, _DROP) for c in await _entities(session, Citation, tree.id)],
|
|
"media": media_out,
|
|
})
|
|
|
|
buf = io.BytesIO()
|
|
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
zf.writestr("account.json", json.dumps(payload, default=str, indent=2))
|
|
for ref, blob in media_blobs:
|
|
zf.writestr(ref, blob)
|
|
return buf.getvalue()
|
|
|
|
|
|
def _as_uuid(v) -> uuid.UUID | None:
|
|
return uuid.UUID(v) if v else None
|
|
|
|
|
|
def _as_date(v) -> date | None:
|
|
return date.fromisoformat(v) if v else None
|
|
|
|
|
|
async def import_account(
|
|
session: AsyncSession, store: ObjectStore, *, user: User, raw_zip: bytes
|
|
) -> dict:
|
|
"""Restore an exported zip into NEW trees owned by the user. Non-destructive:
|
|
every record gets a fresh id; nothing existing is touched."""
|
|
try:
|
|
zf = zipfile.ZipFile(io.BytesIO(raw_zip))
|
|
payload = json.loads(zf.read("account.json"))
|
|
except (zipfile.BadZipFile, KeyError, json.JSONDecodeError) as e:
|
|
raise NotFound("not a valid Provenance export") from e
|
|
|
|
counts: dict[str, int] = {"trees": 0, "persons": 0, "events": 0, "media": 0}
|
|
|
|
for tdata in payload.get("trees", []):
|
|
t = tdata.get("tree", {})
|
|
tree = Tree(
|
|
owner_id=user.id,
|
|
name=(t.get("name") or "Imported tree"),
|
|
description=t.get("description"),
|
|
visibility=t.get("visibility") or "private",
|
|
)
|
|
session.add(tree)
|
|
await session.flush()
|
|
session.add(
|
|
TreeMembership(tree_id=tree.id, user_id=user.id, role=MembershipRole.owner)
|
|
)
|
|
counts["trees"] += 1
|
|
|
|
# id remaps from the export's ids to the freshly created ones.
|
|
pmap: dict[str, uuid.UUID] = {}
|
|
rmap: dict[str, uuid.UUID] = {}
|
|
smap: dict[str, uuid.UUID] = {}
|
|
nmap: dict[str, uuid.UUID] = {}
|
|
emap: dict[str, uuid.UUID] = {}
|
|
plmap: dict[str, uuid.UUID] = {}
|
|
|
|
for pl in tdata.get("places", []):
|
|
obj = Place(
|
|
tree_id=tree.id,
|
|
name=pl.get("name") or "",
|
|
place_type=pl.get("place_type"),
|
|
latitude=pl.get("latitude"),
|
|
longitude=pl.get("longitude"),
|
|
)
|
|
session.add(obj)
|
|
await session.flush()
|
|
plmap[pl["id"]] = obj.id
|
|
|
|
for p in tdata.get("persons", []):
|
|
obj = Person(
|
|
tree_id=tree.id,
|
|
gender=p.get("gender"),
|
|
is_living=p.get("is_living"),
|
|
privacy=p.get("privacy") or "inherit",
|
|
notes=p.get("notes"),
|
|
)
|
|
session.add(obj)
|
|
await session.flush()
|
|
pmap[p["id"]] = obj.id
|
|
counts["persons"] += 1
|
|
|
|
for n in tdata.get("names", []):
|
|
pid = pmap.get(n.get("person_id"))
|
|
if pid is None:
|
|
continue
|
|
obj = Name(
|
|
tree_id=tree.id,
|
|
person_id=pid,
|
|
name_type=n.get("name_type") or "birth",
|
|
given=n.get("given"),
|
|
surname=n.get("surname"),
|
|
prefix=n.get("prefix"),
|
|
suffix=n.get("suffix"),
|
|
nickname=n.get("nickname"),
|
|
display_name=n.get("display_name"),
|
|
is_primary=bool(n.get("is_primary")),
|
|
sort_order=n.get("sort_order") or 0,
|
|
)
|
|
session.add(obj)
|
|
await session.flush()
|
|
nmap[n["id"]] = obj.id
|
|
|
|
for r in tdata.get("relationships", []):
|
|
a = pmap.get(r.get("person_from_id"))
|
|
b = pmap.get(r.get("person_to_id"))
|
|
if a is None or b is None:
|
|
continue
|
|
obj = Relationship(
|
|
tree_id=tree.id,
|
|
type=r.get("type"),
|
|
person_from_id=a,
|
|
person_to_id=b,
|
|
qualifier=r.get("qualifier"),
|
|
notes=r.get("notes"),
|
|
)
|
|
session.add(obj)
|
|
await session.flush()
|
|
rmap[r["id"]] = obj.id
|
|
|
|
for e in tdata.get("events", []):
|
|
obj = Event(
|
|
tree_id=tree.id,
|
|
event_type=e.get("event_type") or "other",
|
|
person_id=pmap.get(e.get("person_id")),
|
|
relationship_id=rmap.get(e.get("relationship_id")),
|
|
place_id=plmap.get(e.get("place_id")),
|
|
date_value=e.get("date_value"),
|
|
date_start=_as_date(e.get("date_start")),
|
|
date_end=_as_date(e.get("date_end")),
|
|
date_precision=e.get("date_precision"),
|
|
calendar=e.get("calendar") or "gregorian",
|
|
detail=e.get("detail"),
|
|
notes=e.get("notes"),
|
|
)
|
|
session.add(obj)
|
|
await session.flush()
|
|
emap[e["id"]] = obj.id
|
|
counts["events"] += 1
|
|
|
|
for s in tdata.get("sources", []):
|
|
obj = Source(
|
|
tree_id=tree.id,
|
|
title=s.get("title") or "Untitled source",
|
|
author=s.get("author"),
|
|
source_type=s.get("source_type"),
|
|
repository=s.get("repository"),
|
|
url=s.get("url"),
|
|
citation_text=s.get("citation_text"),
|
|
publication_info=s.get("publication_info"),
|
|
quality_note=s.get("quality_note"),
|
|
)
|
|
session.add(obj)
|
|
await session.flush()
|
|
smap[s["id"]] = obj.id
|
|
|
|
for c in tdata.get("citations", []):
|
|
sid = smap.get(c.get("source_id"))
|
|
if sid is None:
|
|
continue
|
|
session.add(
|
|
Citation(
|
|
tree_id=tree.id,
|
|
source_id=sid,
|
|
person_id=pmap.get(c.get("person_id")),
|
|
event_id=emap.get(c.get("event_id")),
|
|
name_id=nmap.get(c.get("name_id")),
|
|
relationship_id=rmap.get(c.get("relationship_id")),
|
|
page=c.get("page"),
|
|
detail=c.get("detail"),
|
|
confidence=c.get("confidence"),
|
|
)
|
|
)
|
|
|
|
for m in tdata.get("media", []):
|
|
ref = m.get("_file")
|
|
if not ref:
|
|
continue
|
|
try:
|
|
blob = zf.read(ref)
|
|
except KeyError:
|
|
continue
|
|
media_id = uuid.uuid4()
|
|
filename = m.get("original_filename") or "upload"
|
|
key = f"{tree.id}/{media_id}/{filename}"
|
|
await store.ensure_bucket()
|
|
await store.put_object(
|
|
key=key,
|
|
data=blob,
|
|
content_type=m.get("content_type") or "application/octet-stream",
|
|
)
|
|
session.add(
|
|
Media(
|
|
id=media_id,
|
|
tree_id=tree.id,
|
|
uploader_id=user.id,
|
|
storage_key=key,
|
|
original_filename=filename,
|
|
content_type=m.get("content_type") or "application/octet-stream",
|
|
byte_size=len(blob),
|
|
checksum_sha256=hashlib.sha256(blob).hexdigest(),
|
|
title=m.get("title"),
|
|
person_id=pmap.get(m.get("person_id")),
|
|
event_id=emap.get(m.get("event_id")),
|
|
source_id=smap.get(m.get("source_id")),
|
|
)
|
|
)
|
|
counts["media"] += 1
|
|
|
|
# Remap the home person last, once persons exist.
|
|
home = t.get("home_person_id")
|
|
if home and home in pmap:
|
|
tree.home_person_id = pmap[home]
|
|
|
|
record_audit(
|
|
session,
|
|
action="import",
|
|
entity_type="Account",
|
|
entity_id=tree.id,
|
|
tree_id=tree.id,
|
|
actor_user_id=user.id,
|
|
after=counts,
|
|
)
|
|
|
|
await session.commit()
|
|
return counts
|
|
|
|
|
|
async def delete_account(session: AsyncSession, *, user: User, confirm_email: str) -> None:
|
|
"""Soft-delete the account: the user, the trees they own, and all their
|
|
sessions. Requires the user to retype their email as a guard."""
|
|
if confirm_email.strip().lower() != user.email.lower():
|
|
raise Forbidden("email confirmation does not match")
|
|
now = datetime.now(UTC)
|
|
|
|
await session.execute(
|
|
update(Tree)
|
|
.where(Tree.owner_id == user.id, Tree.deleted_at.is_(None))
|
|
.values(deleted_at=now)
|
|
)
|
|
await session.execute(
|
|
update(SessionModel)
|
|
.where(SessionModel.user_id == user.id, SessionModel.revoked_at.is_(None))
|
|
.values(revoked_at=now)
|
|
)
|
|
user.deleted_at = now
|
|
record_audit(
|
|
session,
|
|
action="delete",
|
|
entity_type="User",
|
|
entity_id=user.id,
|
|
actor_user_id=user.id,
|
|
)
|
|
await session.commit()
|