Files
provenance/backend/tests/test_authed_nonmember_redaction.py
justin a6179037c2 Close citation/source living-person leak; add on-demand tree purge
Two changes.

1. Privacy fix (NN#2/NN#3) — the citation and source list endpoints gated only
   on can_view_tree, so a non-member on a public/unlisted/site_members tree could
   enumerate citations and sources tied to a redacted living person, leaking that
   the person exists and has sourced facts (and possibly their name via a source
   title). #46 closed this for events/media/names/relationships but not
   citations/sources. Now citation_service.list_citations and
   source_service.{list_sources,get_source} delegate non-member reads to
   public_view_service, mirroring the #46 pattern:
   - citations: shown only when the cited fact resolves to FULL-visibility
     person(s) — covers the person_id, name_id, event_id (person or both-partner),
     and relationship_id (both-partner) target paths.
   - sources: shown only when they back at least one visible citation; a withheld
     source 404s (don't reveal it exists).
   Tests cover all four citation target types + source withholding + member-sees-all.

2. On-demand tree purge — owners can permanently delete a soft-deleted tree now
   instead of waiting out the 30-day auto-purge window. POST /trees/{id}/purge
   (owner-only): the tree must already be in the trash, and the caller retypes its
   name to confirm. Media objects are deleted from storage, then a single
   DELETE on trees cascades all tree-owned rows via the tree_id ON DELETE CASCADE;
   the audit entry survives (tree_id SET NULL). Frontend adds a "Delete forever"
   button to the Recently-deleted list. No migration.

Suite: 102 passing.
Signed-off-by: Justin Paul <justin@jpaul.me>
2026-06-10 22:38:59 -04:00

258 lines
9.8 KiB
Python

"""Authed non-member reads must redact PER-PERSON, not just gate on the tree.
A logged-in user who is NOT a member of a public tree previously saw living
people's dates, real alternate names, and media through the family-view
endpoints — only the person *list* was redacted. These tests assert that leak is
closed while members still see everything.
"""
from tests.conftest import auth, register
LSURNAME = "Authleaksurname"
LALIAS = "Authleakalias"
LYEAR = "2003"
async def _setup(client):
owner = auth(await register(client, "anm-owner@ex.com"))
tid = (
await client.post(
"/api/v1/trees", json={"name": "Pub", "visibility": "public"}, headers=owner
)
).json()["id"]
old = (
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Olde", "surname": "Gone", "is_living": False},
headers=owner,
)
).json()["id"]
young = (
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Youngauth", "surname": LSURNAME, "is_living": True},
headers=owner,
)
).json()["id"]
for pid, year in ((old, "1855"), (young, LYEAR)):
await client.post(
f"/api/v1/trees/{tid}/events",
json={"event_type": "birth", "person_id": pid, "date_value": year},
headers=owner,
)
await client.post(
f"/api/v1/trees/{tid}/persons/{young}/names",
json={"name_type": "alias", "given": LALIAS},
headers=owner,
)
om = (
await client.post(
f"/api/v1/trees/{tid}/media",
files={"file": ("o.txt", b"old-photo", "text/plain")},
data={"person_id": old},
headers=owner,
)
).json()["id"]
ym = (
await client.post(
f"/api/v1/trees/{tid}/media",
files={"file": ("y.txt", b"young-photo", "text/plain")},
data={"person_id": young},
headers=owner,
)
).json()["id"]
return owner, tid, old, young, om, ym
async def test_authed_nonmember_does_not_see_living_pii(client):
owner, tid, old, young, om, ym = await _setup(client)
stranger = auth(await register(client, "anm-stranger@ex.com"))
# Living person's events dropped; deceased kept.
events = (await client.get(f"/api/v1/trees/{tid}/events", headers=stranger)).json()
assert any(e["person_id"] == old for e in events)
assert not any(e["person_id"] == young for e in events)
# Per-person living: names + events empty.
assert (
await client.get(f"/api/v1/trees/{tid}/persons/{young}/names", headers=stranger)
).json() == []
assert (
await client.get(f"/api/v1/trees/{tid}/persons/{young}/events", headers=stranger)
).json() == []
# The living surname/alias/birth-year must not appear in any of these.
for path in (
f"/api/v1/trees/{tid}/events",
f"/api/v1/trees/{tid}/relationships",
f"/api/v1/trees/{tid}/persons/{young}/names",
f"/api/v1/trees/{tid}/media",
):
body = (await client.get(path, headers=stranger)).text
assert LSURNAME not in body, path
assert LALIAS not in body, path
assert LYEAR not in body, path
# Media: living person's media hidden from the list and undownloadable;
# deceased person's media is fine.
media_ids = {m["id"] for m in (await client.get(f"/api/v1/trees/{tid}/media", headers=stranger)).json()}
assert om in media_ids
assert ym not in media_ids
assert (
await client.get(f"/api/v1/trees/{tid}/media/{ym}/content", headers=stranger)
).status_code == 404
assert (
await client.get(f"/api/v1/trees/{tid}/media/{om}/content", headers=stranger)
).status_code == 200
async def _setup_sources(client):
owner = auth(await register(client, "anmcs-owner@ex.com"))
tid = (
await client.post(
"/api/v1/trees", json={"name": "PubCS", "visibility": "public"}, headers=owner
)
).json()["id"]
old = (
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Oldcs", "surname": "Gonecs", "is_living": False},
headers=owner,
)
).json()["id"]
young = (
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Youngcs", "surname": "Csleaksurname", "is_living": True},
headers=owner,
)
).json()["id"]
for pid, year in ((old, "1851"), (young, "2004")):
await client.post(
f"/api/v1/trees/{tid}/events",
json={"event_type": "birth", "person_id": pid, "date_value": year},
headers=owner,
)
s_old = (
await client.post(
f"/api/v1/trees/{tid}/sources", json={"title": "Oldsource record"}, headers=owner
)
).json()["id"]
s_young = (
await client.post(
f"/api/v1/trees/{tid}/sources",
json={"title": "Youngsource Csleaktitle"}, # title names the living person
headers=owner,
)
).json()["id"]
await client.post(
f"/api/v1/trees/{tid}/citations",
json={"source_id": s_old, "person_id": old, "page": "p.1"},
headers=owner,
)
await client.post(
f"/api/v1/trees/{tid}/citations",
json={"source_id": s_young, "person_id": young, "page": "p.2"},
headers=owner,
)
return owner, tid, old, young, s_old, s_young
async def test_authed_nonmember_citation_source_redaction(client):
"""A non-member must not see citations on a redacted living person's facts,
nor sources used only for them."""
owner, tid, old, young, s_old, s_young = await _setup_sources(client)
stranger = auth(await register(client, "anmcs-stranger@ex.com"))
cites = (await client.get(f"/api/v1/trees/{tid}/citations", headers=stranger)).json()
cited = {c.get("person_id") for c in cites}
assert old in cited
assert young not in cited # living person's citation dropped
srcs = (await client.get(f"/api/v1/trees/{tid}/sources", headers=stranger))
src_ids = {s["id"] for s in srcs.json()}
assert s_old in src_ids
assert s_young not in src_ids # source used only for the living person withheld
assert "Csleaktitle" not in srcs.text # its title (which names them) must not leak
# The withheld source 404s — don't reveal it exists; the visible one is fine.
assert (
await client.get(f"/api/v1/trees/{tid}/sources/{s_young}", headers=stranger)
).status_code == 404
assert (
await client.get(f"/api/v1/trees/{tid}/sources/{s_old}", headers=stranger)
).status_code == 200
# Members still see everything.
mc = {c.get("person_id") for c in (await client.get(f"/api/v1/trees/{tid}/citations", headers=owner)).json()}
assert {old, young} <= mc
ms = {s["id"] for s in (await client.get(f"/api/v1/trees/{tid}/sources", headers=owner)).json()}
assert {s_old, s_young} <= ms
async def test_citation_redaction_via_indirect_targets(client):
"""Citations targeting a living person *indirectly* (via their event or name,
not person_id) must also be dropped for non-members."""
owner = auth(await register(client, "anmind-owner@ex.com"))
tid = (
await client.post(
"/api/v1/trees", json={"name": "PubInd", "visibility": "public"}, headers=owner
)
).json()["id"]
young = (
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Youngind", "surname": "Indsurname", "is_living": True},
headers=owner,
)
).json()["id"]
ev = (
await client.post(
f"/api/v1/trees/{tid}/events",
json={"event_type": "birth", "person_id": young, "date_value": "2005"},
headers=owner,
)
).json()["id"]
nm = (
await client.post(
f"/api/v1/trees/{tid}/persons/{young}/names",
json={"name_type": "alias", "given": "Indalias"},
headers=owner,
)
).json()["id"]
s_ev = (await client.post(f"/api/v1/trees/{tid}/sources", json={"title": "EvSrc"}, headers=owner)).json()["id"]
s_nm = (await client.post(f"/api/v1/trees/{tid}/sources", json={"title": "NmSrc"}, headers=owner)).json()["id"]
await client.post(
f"/api/v1/trees/{tid}/citations", json={"source_id": s_ev, "event_id": ev}, headers=owner
)
await client.post(
f"/api/v1/trees/{tid}/citations", json={"source_id": s_nm, "name_id": nm}, headers=owner
)
stranger = auth(await register(client, "anmind-stranger@ex.com"))
cites = (await client.get(f"/api/v1/trees/{tid}/citations", headers=stranger)).json()
# Neither the event-citation nor the name-citation may surface.
assert not any(c.get("event_id") == ev for c in cites)
assert not any(c.get("name_id") == nm for c in cites)
src_ids = {s["id"] for s in (await client.get(f"/api/v1/trees/{tid}/sources", headers=stranger)).json()}
assert s_ev not in src_ids and s_nm not in src_ids
# Owner (member) sees both citations and both sources.
mc = (await client.get(f"/api/v1/trees/{tid}/citations", headers=owner)).json()
assert any(c.get("event_id") == ev for c in mc) and any(c.get("name_id") == nm for c in mc)
async def test_member_still_sees_everything(client):
owner, tid, old, young, om, ym = await _setup(client)
events = (await client.get(f"/api/v1/trees/{tid}/events", headers=owner)).json()
assert any(e["person_id"] == young for e in events)
assert (
await client.get(f"/api/v1/trees/{tid}/persons/{young}/names", headers=owner)
).json() != []
member_media = {m["id"] for m in (await client.get(f"/api/v1/trees/{tid}/media", headers=owner)).json()}
assert ym in member_media
assert (
await client.get(f"/api/v1/trees/{tid}/media/{ym}/content", headers=owner)
).status_code == 200