Close citation/source living-person leak; add on-demand tree purge

Two changes.

1. Privacy fix (NN#2/NN#3) — the citation and source list endpoints gated only
   on can_view_tree, so a non-member on a public/unlisted/site_members tree could
   enumerate citations and sources tied to a redacted living person, leaking that
   the person exists and has sourced facts (and possibly their name via a source
   title). #46 closed this for events/media/names/relationships but not
   citations/sources. Now citation_service.list_citations and
   source_service.{list_sources,get_source} delegate non-member reads to
   public_view_service, mirroring the #46 pattern:
   - citations: shown only when the cited fact resolves to FULL-visibility
     person(s) — covers the person_id, name_id, event_id (person or both-partner),
     and relationship_id (both-partner) target paths.
   - sources: shown only when they back at least one visible citation; a withheld
     source 404s (don't reveal it exists).
   Tests cover all four citation target types + source withholding + member-sees-all.

2. On-demand tree purge — owners can permanently delete a soft-deleted tree now
   instead of waiting out the 30-day auto-purge window. POST /trees/{id}/purge
   (owner-only): the tree must already be in the trash, and the caller retypes its
   name to confirm. Media objects are deleted from storage, then a single
   DELETE on trees cascades all tree-owned rows via the tree_id ON DELETE CASCADE;
   the audit entry survives (tree_id SET NULL). Frontend adds a "Delete forever"
   button to the Recently-deleted list. No migration.

Suite: 102 passing.
Signed-off-by: Justin Paul <justin@jpaul.me>
This commit is contained in:
2026-06-10 22:38:59 -04:00
parent 7ed3ddd448
commit a6179037c2
12 changed files with 558 additions and 12 deletions
+17 -2
View File
@@ -2,8 +2,8 @@ import uuid
from fastapi import APIRouter, status
from app.api.deps import CurrentUser, SessionDep
from app.schemas.tree import TreeCreate, TreeRead, TreeUpdate
from app.api.deps import CurrentUser, ObjectStoreDep, SessionDep
from app.schemas.tree import TreeCreate, TreePurge, TreeRead, TreeUpdate
from app.services import tree_service
router = APIRouter(prefix="/trees", tags=["trees"])
@@ -57,3 +57,18 @@ async def delete_tree(tree_id: uuid.UUID, session: SessionDep, current: CurrentU
async def restore_tree(tree_id: uuid.UUID, session: SessionDep, current: CurrentUser) -> TreeRead:
tree = await tree_service.restore_tree(session, actor=current, tree_id=tree_id)
return TreeRead.model_validate(tree)
@router.post("/{tree_id}/purge", status_code=status.HTTP_204_NO_CONTENT)
async def purge_tree(
tree_id: uuid.UUID,
data: TreePurge,
session: SessionDep,
current: CurrentUser,
store: ObjectStoreDep,
) -> None:
"""Permanently delete a soft-deleted tree and all its data — irreversible.
Owner-only; the tree must be in the trash and `confirm_name` must match."""
await tree_service.purge_tree(
session, store, actor=current, tree_id=tree_id, confirm_name=data.confirm_name
)
+5
View File
@@ -19,6 +19,11 @@ class TreeUpdate(BaseModel):
home_person_id: uuid.UUID | None = None
class TreePurge(BaseModel):
# Retype the tree's name to confirm a permanent, irreversible delete.
confirm_name: str
class TreeRead(BaseModel):
model_config = ConfigDict(from_attributes=True)
+9
View File
@@ -105,6 +105,15 @@ async def list_citations(
indicators in a single round-trip."""
if not await privacy.can_view_tree(session, user_id=viewer_id, tree=tree):
raise Forbidden("not permitted to view this tree")
# Non-members get only citations whose cited fact resolves to a full-
# visibility person — a citation on a redacted living person's fact would
# otherwise leak that the person has that sourced fact.
if await privacy.get_membership_role(session, viewer_id, tree.id) is None:
from app.services import public_view_service
return await public_view_service.list_public_citations(
session, viewer_id=viewer_id, tree=tree
)
stmt = (
select(Citation)
.where(Citation.tree_id == tree.id, Citation.deleted_at.is_(None))
@@ -12,6 +12,8 @@ person's real name, dates, alternate names, or media. The rules:
living partner's timeline otherwise).
- names : only for FULL-visibility persons.
- media : NOT exposed yet (deferred — see docs/design/tree-visibility.md).
- citations : only when the cited fact resolves to FULL person(s).
- sources : only when they back at least one visible citation.
A tree that isn't viewable raises NotFound (never Forbidden) so the public
surface can't be used to probe whether a private tree exists.
@@ -27,6 +29,7 @@ from app.models.event import Event
from app.models.media import Media
from app.models.person import Name, Person
from app.models.relationship import Relationship
from app.models.source import Citation, Source
from app.models.tree import Tree
from app.services import privacy
from app.services.exceptions import NotFound
@@ -296,6 +299,95 @@ async def can_view_media(
return vis == Visibility.full
async def _full_person_ids(
session: AsyncSession, *, viewer_id: uuid.UUID | None, tree: Tree
) -> set[uuid.UUID]:
persons = await _persons(session, tree)
vis = await _visibility_map(session, viewer_id=viewer_id, tree=tree, persons=persons)
return {pid for pid, v in vis.items() if v == Visibility.full}
async def list_public_citations(
session: AsyncSession, *, viewer_id: uuid.UUID | None, tree: Tree
) -> list[Citation]:
"""Only citations whose cited fact resolves to FULL-visibility person(s). A
citation on a redacted/hidden person's fact (or a partnership where either
partner isn't full) is dropped — its existence plus page/detail would leak
that the person has that sourced fact. Mirrors the events/names rule (FULL
only)."""
full = await _full_person_ids(session, viewer_id=viewer_id, tree=tree)
async def _by_id(model):
rows = (
await session.execute(
select(model).where(model.tree_id == tree.id, model.deleted_at.is_(None))
)
).scalars().all()
return {r.id: r for r in rows}
names = await _by_id(Name)
rels = await _by_id(Relationship)
events = await _by_id(Event)
def target_is_full(c: Citation) -> bool:
if c.person_id is not None:
return c.person_id in full
if c.name_id is not None:
n = names.get(c.name_id)
return n is not None and n.person_id in full
if c.event_id is not None:
e = events.get(c.event_id)
if e is None:
return False
if e.person_id is not None:
return e.person_id in full
if e.relationship_id is not None:
r = rels.get(e.relationship_id)
return r is not None and r.person_from_id in full and r.person_to_id in full
return False
if c.relationship_id is not None:
r = rels.get(c.relationship_id)
return r is not None and r.person_from_id in full and r.person_to_id in full
return False
citations = (
await session.execute(
select(Citation)
.where(Citation.tree_id == tree.id, Citation.deleted_at.is_(None))
.order_by(Citation.created_at)
)
).scalars().all()
return [c for c in citations if target_is_full(c)]
async def list_public_sources(
session: AsyncSession, *, viewer_id: uuid.UUID | None, tree: Tree
) -> list[Source]:
"""Only sources backing at least one visible citation. A source used solely
for a redacted/hidden person's facts is withheld — its title or notes could
name that living person."""
visible = await list_public_citations(session, viewer_id=viewer_id, tree=tree)
cited = {c.source_id for c in visible}
sources = (
await session.execute(
select(Source)
.where(Source.tree_id == tree.id, Source.deleted_at.is_(None))
.order_by(Source.title)
)
).scalars().all()
return [s for s in sources if s.id in cited]
async def get_public_source(
session: AsyncSession, *, viewer_id: uuid.UUID | None, tree: Tree, source_id: uuid.UUID
) -> Source:
for s in await list_public_sources(session, viewer_id=viewer_id, tree=tree):
if s.id == source_id:
return s
# 404 (not 403): don't reveal that a withheld source exists.
raise NotFound("source not found")
async def list_public_trees(
session: AsyncSession,
*,
+14
View File
@@ -61,6 +61,14 @@ async def create_source(
async def list_sources(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tree) -> list[Source]:
if not await privacy.can_view_tree(session, user_id=viewer_id, tree=tree):
raise Forbidden("not permitted to view this tree")
# Non-members see only sources backing a visible citation (see citation
# redaction) — a source used solely for a redacted person could name them.
if await privacy.get_membership_role(session, viewer_id, tree.id) is None:
from app.services import public_view_service
return await public_view_service.list_public_sources(
session, viewer_id=viewer_id, tree=tree
)
stmt = (
select(Source)
.where(Source.tree_id == tree.id, Source.deleted_at.is_(None))
@@ -74,6 +82,12 @@ async def get_source(
) -> Source:
if not await privacy.can_view_tree(session, user_id=viewer_id, tree=tree):
raise Forbidden("not permitted to view this tree")
if await privacy.get_membership_role(session, viewer_id, tree.id) is None:
from app.services import public_view_service
return await public_view_service.get_public_source(
session, viewer_id=viewer_id, tree=tree, source_id=source_id
)
source = (
await session.execute(
select(Source).where(
+48 -2
View File
@@ -5,16 +5,18 @@ authorization basis) and an audit entry. Reads go through the privacy engine.
import uuid
from datetime import UTC, datetime
from sqlalchemy import select
from sqlalchemy import delete, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.integrations.objectstore.base import ObjectStore
from app.models.enums import MembershipRole, TreeVisibility
from app.models.media import Media
from app.models.tree import Tree, TreeMembership
from app.models.user import User
from app.repositories.base import BaseRepository
from app.services import privacy
from app.services.audit import record_audit
from app.services.exceptions import Forbidden, NotFound
from app.services.exceptions import Conflict, Forbidden, NotFound
async def create_tree(
@@ -128,6 +130,50 @@ async def restore_tree(session: AsyncSession, *, actor: User, tree_id: uuid.UUID
return tree
async def purge_tree(
session: AsyncSession,
store: ObjectStore,
*,
actor: User,
tree_id: uuid.UUID,
confirm_name: str,
) -> None:
"""Permanently delete a soft-deleted tree and ALL its data — irreversible.
Owner-only. The tree must already be in the trash (soft-deleted) and the
caller must retype its name. Tree-owned rows are removed by the `tree_id`
ON DELETE CASCADE; we delete the media objects from storage first (the DB
cascade drops the rows but not the bytes). Audit entries survive with their
`tree_id` nulled (ON DELETE SET NULL), so the purge stays in the log."""
tree = await _owned_tree(session, actor=actor, tree_id=tree_id)
if tree.deleted_at is None:
raise Conflict("delete the tree first, then purge it from the trash")
if confirm_name.strip() != (tree.name or "").strip():
raise Forbidden("tree name confirmation does not match")
keys = list(
(
await session.execute(select(Media.storage_key).where(Media.tree_id == tree.id))
).scalars().all()
)
for key in keys:
try:
await store.delete_object(key=key)
except Exception: # noqa: BLE001 — best-effort; a missing object must not block the purge
pass
record_audit(
session,
action="purge",
entity_type="Tree",
entity_id=tree.id,
tree_id=tree.id,
actor_user_id=actor.id,
before={"name": tree.name},
)
await session.execute(delete(Tree).where(Tree.id == tree.id))
await session.commit()
async def list_deleted_trees_for_user(session: AsyncSession, *, user: User) -> list[Tree]:
stmt = (
select(Tree)
@@ -106,6 +106,142 @@ async def test_authed_nonmember_does_not_see_living_pii(client):
).status_code == 200
async def _setup_sources(client):
owner = auth(await register(client, "anmcs-owner@ex.com"))
tid = (
await client.post(
"/api/v1/trees", json={"name": "PubCS", "visibility": "public"}, headers=owner
)
).json()["id"]
old = (
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Oldcs", "surname": "Gonecs", "is_living": False},
headers=owner,
)
).json()["id"]
young = (
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Youngcs", "surname": "Csleaksurname", "is_living": True},
headers=owner,
)
).json()["id"]
for pid, year in ((old, "1851"), (young, "2004")):
await client.post(
f"/api/v1/trees/{tid}/events",
json={"event_type": "birth", "person_id": pid, "date_value": year},
headers=owner,
)
s_old = (
await client.post(
f"/api/v1/trees/{tid}/sources", json={"title": "Oldsource record"}, headers=owner
)
).json()["id"]
s_young = (
await client.post(
f"/api/v1/trees/{tid}/sources",
json={"title": "Youngsource Csleaktitle"}, # title names the living person
headers=owner,
)
).json()["id"]
await client.post(
f"/api/v1/trees/{tid}/citations",
json={"source_id": s_old, "person_id": old, "page": "p.1"},
headers=owner,
)
await client.post(
f"/api/v1/trees/{tid}/citations",
json={"source_id": s_young, "person_id": young, "page": "p.2"},
headers=owner,
)
return owner, tid, old, young, s_old, s_young
async def test_authed_nonmember_citation_source_redaction(client):
"""A non-member must not see citations on a redacted living person's facts,
nor sources used only for them."""
owner, tid, old, young, s_old, s_young = await _setup_sources(client)
stranger = auth(await register(client, "anmcs-stranger@ex.com"))
cites = (await client.get(f"/api/v1/trees/{tid}/citations", headers=stranger)).json()
cited = {c.get("person_id") for c in cites}
assert old in cited
assert young not in cited # living person's citation dropped
srcs = (await client.get(f"/api/v1/trees/{tid}/sources", headers=stranger))
src_ids = {s["id"] for s in srcs.json()}
assert s_old in src_ids
assert s_young not in src_ids # source used only for the living person withheld
assert "Csleaktitle" not in srcs.text # its title (which names them) must not leak
# The withheld source 404s — don't reveal it exists; the visible one is fine.
assert (
await client.get(f"/api/v1/trees/{tid}/sources/{s_young}", headers=stranger)
).status_code == 404
assert (
await client.get(f"/api/v1/trees/{tid}/sources/{s_old}", headers=stranger)
).status_code == 200
# Members still see everything.
mc = {c.get("person_id") for c in (await client.get(f"/api/v1/trees/{tid}/citations", headers=owner)).json()}
assert {old, young} <= mc
ms = {s["id"] for s in (await client.get(f"/api/v1/trees/{tid}/sources", headers=owner)).json()}
assert {s_old, s_young} <= ms
async def test_citation_redaction_via_indirect_targets(client):
"""Citations targeting a living person *indirectly* (via their event or name,
not person_id) must also be dropped for non-members."""
owner = auth(await register(client, "anmind-owner@ex.com"))
tid = (
await client.post(
"/api/v1/trees", json={"name": "PubInd", "visibility": "public"}, headers=owner
)
).json()["id"]
young = (
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Youngind", "surname": "Indsurname", "is_living": True},
headers=owner,
)
).json()["id"]
ev = (
await client.post(
f"/api/v1/trees/{tid}/events",
json={"event_type": "birth", "person_id": young, "date_value": "2005"},
headers=owner,
)
).json()["id"]
nm = (
await client.post(
f"/api/v1/trees/{tid}/persons/{young}/names",
json={"name_type": "alias", "given": "Indalias"},
headers=owner,
)
).json()["id"]
s_ev = (await client.post(f"/api/v1/trees/{tid}/sources", json={"title": "EvSrc"}, headers=owner)).json()["id"]
s_nm = (await client.post(f"/api/v1/trees/{tid}/sources", json={"title": "NmSrc"}, headers=owner)).json()["id"]
await client.post(
f"/api/v1/trees/{tid}/citations", json={"source_id": s_ev, "event_id": ev}, headers=owner
)
await client.post(
f"/api/v1/trees/{tid}/citations", json={"source_id": s_nm, "name_id": nm}, headers=owner
)
stranger = auth(await register(client, "anmind-stranger@ex.com"))
cites = (await client.get(f"/api/v1/trees/{tid}/citations", headers=stranger)).json()
# Neither the event-citation nor the name-citation may surface.
assert not any(c.get("event_id") == ev for c in cites)
assert not any(c.get("name_id") == nm for c in cites)
src_ids = {s["id"] for s in (await client.get(f"/api/v1/trees/{tid}/sources", headers=stranger)).json()}
assert s_ev not in src_ids and s_nm not in src_ids
# Owner (member) sees both citations and both sources.
mc = (await client.get(f"/api/v1/trees/{tid}/citations", headers=owner)).json()
assert any(c.get("event_id") == ev for c in mc) and any(c.get("name_id") == nm for c in mc)
async def test_member_still_sees_everything(client):
owner, tid, old, young, om, ym = await _setup(client)
+78
View File
@@ -0,0 +1,78 @@
"""On-demand purge of a soft-deleted tree: permanent, owner-only, name-confirmed,
and cascades to all tree data."""
import uuid
from sqlalchemy import func, select
from app.models.person import Person
from app.models.tree import Tree
from tests.conftest import auth, register
async def _tree_with_person(client, owner):
tid = (await client.post("/api/v1/trees", json={"name": "Purge Me"}, headers=owner)).json()["id"]
await client.post(
f"/api/v1/trees/{tid}/persons", json={"given": "Doomed", "surname": "Soul"}, headers=owner
)
return tid
async def test_purge_requires_soft_delete_first(client):
owner = auth(await register(client, "purge-a@ex.com"))
tid = await _tree_with_person(client, owner)
# A live tree can't be purged — it must be trashed first.
r = await client.post(
f"/api/v1/trees/{tid}/purge", json={"confirm_name": "Purge Me"}, headers=owner
)
assert r.status_code == 409
async def test_purge_name_must_match(client):
owner = auth(await register(client, "purge-b@ex.com"))
tid = await _tree_with_person(client, owner)
await client.delete(f"/api/v1/trees/{tid}", headers=owner) # soft-delete
r = await client.post(
f"/api/v1/trees/{tid}/purge", json={"confirm_name": "WRONG"}, headers=owner
)
assert r.status_code == 403
# Still in the trash — nothing destroyed.
deleted = (await client.get("/api/v1/trees", params={"deleted": True}, headers=owner)).json()
assert any(t["id"] == tid for t in deleted)
async def test_purge_owner_only(client):
owner = auth(await register(client, "purge-c@ex.com"))
other = auth(await register(client, "purge-c2@ex.com"))
tid = await _tree_with_person(client, owner)
await client.delete(f"/api/v1/trees/{tid}", headers=owner)
r = await client.post(
f"/api/v1/trees/{tid}/purge", json={"confirm_name": "Purge Me"}, headers=other
)
assert r.status_code in (403, 404)
async def test_purge_removes_tree_and_cascades(client, db_session):
owner = auth(await register(client, "purge-d@ex.com"))
tid = await _tree_with_person(client, owner)
await client.delete(f"/api/v1/trees/{tid}", headers=owner)
r = await client.post(
f"/api/v1/trees/{tid}/purge", json={"confirm_name": "Purge Me"}, headers=owner
)
assert r.status_code == 204
# Gone from the trash...
deleted = (await client.get("/api/v1/trees", params={"deleted": True}, headers=owner)).json()
assert not any(t["id"] == tid for t in deleted)
# ...and cascaded: no tree row, no person rows.
tuuid = uuid.UUID(tid)
assert (
await db_session.execute(select(func.count()).select_from(Tree).where(Tree.id == tuuid))
).scalar() == 0
assert (
await db_session.execute(
select(func.count()).select_from(Person).where(Person.tree_id == tuuid)
)
).scalar() == 0