From 4788ae7723cdc17f47bc91157a70b2963e62f47c Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Sun, 7 Jun 2026 07:55:13 -0400 Subject: [PATCH] Add fuzzy name search (pg_trgm) and living-person protection Fuzzy search: pg_trgm extension + trigram GIN indexes on name parts and a GET /trees/{id}/persons?q= search ranked by trigram similarity (finds Mueller for 'muller'), privacy-filtered. Living-person protection: the privacy engine now derives possibly-living status (explicit flag, else no death fact + birth within ~100y or unknown) and returns 'redacted' for non-members of public/unlisted trees; the service minimises those records ('Living person', no vitals). Members are unaffected. 31 tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Justin Paul --- backend/app/api/v1/persons.py | 12 ++- backend/app/models/person.py | 18 +++- backend/app/services/person_service.py | 90 ++++++++++++++++--- backend/app/services/privacy.py | 51 ++++++++++- .../9a2b1c7d4e10_pg_trgm_name_search.py | 33 +++++++ backend/tests/conftest.py | 2 + backend/tests/test_privacy_living.py | 36 ++++++++ backend/tests/test_search.py | 24 +++++ 8 files changed, 248 insertions(+), 18 deletions(-) create mode 100644 backend/migrations/versions/9a2b1c7d4e10_pg_trgm_name_search.py create mode 100644 backend/tests/test_privacy_living.py create mode 100644 backend/tests/test_search.py diff --git a/backend/app/api/v1/persons.py b/backend/app/api/v1/persons.py index 43f66a0..9beb92c 100644 --- a/backend/app/api/v1/persons.py +++ b/backend/app/api/v1/persons.py @@ -36,10 +36,18 @@ async def create_person( @router.get("/{tree_id}/persons", response_model=list[PersonRead]) async def list_persons( - tree_id: uuid.UUID, session: SessionDep, current: CurrentUser, deleted: bool = False + tree_id: uuid.UUID, + session: SessionDep, + current: CurrentUser, + deleted: bool = False, + q: str | None = None, ) -> list[PersonRead]: tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id) - if deleted: + if q: + persons = await person_service.search_persons( + session, viewer_id=current.id, tree=tree, query=q + ) + elif deleted: persons = await person_service.list_deleted_persons( session, viewer_id=current.id, tree=tree ) diff --git a/backend/app/models/person.py b/backend/app/models/person.py index d266482..d04dc2a 100644 --- a/backend/app/models/person.py +++ b/backend/app/models/person.py @@ -7,7 +7,7 @@ aliases) so name changes over time are first-class. import uuid -from sqlalchemy import Boolean, ForeignKey, Integer, String, Text, text +from sqlalchemy import Boolean, ForeignKey, Index, Integer, String, Text, text from sqlalchemy import Enum as SAEnum from sqlalchemy.orm import Mapped, mapped_column @@ -33,6 +33,22 @@ class Person(Base, UUIDPrimaryKey, TenantScoped, Timestamps, SoftDelete): class Name(Base, UUIDPrimaryKey, TenantScoped, Timestamps, SoftDelete): __tablename__ = "names" + # Trigram indexes for fuzzy name search (Mueller/Müller/Muller). Requires the + # pg_trgm extension (enabled in the accompanying migration). + __table_args__ = ( + Index( + "ix_names_given_trgm", + "given", + postgresql_using="gin", + postgresql_ops={"given": "gin_trgm_ops"}, + ), + Index( + "ix_names_surname_trgm", + "surname", + postgresql_using="gin", + postgresql_ops={"surname": "gin_trgm_ops"}, + ), + ) person_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("persons.id", ondelete="CASCADE"), index=True diff --git a/backend/app/services/person_service.py b/backend/app/services/person_service.py index 78ba7e5..03e82c0 100644 --- a/backend/app/services/person_service.py +++ b/backend/app/services/person_service.py @@ -6,7 +6,7 @@ person through the privacy engine. Each returned Person gets a transient import uuid from datetime import UTC, datetime -from sqlalchemy import select +from sqlalchemy import func, or_, select from sqlalchemy.ext.asyncio import AsyncSession from app.models.enums import PersonPrivacy @@ -25,6 +25,14 @@ def _format_name(name: Name) -> str | None: return joined or name.display_name +def _redact(person: Person) -> None: + """Minimise a possibly-living person for a non-member view (transient only — + never committed).""" + person.primary_name = "Living person" + person.gender = None + person.is_living = True + + async def _attach_primary_name(session: AsyncSession, person: Person) -> None: stmt = ( select(Name) @@ -104,12 +112,15 @@ async def get_person( if person is None: raise NotFound("person not found") # Run the single person through the privacy engine (redaction lands Phase 2). - if ( - await privacy.person_visibility(session, user_id=viewer_id, tree=tree, person=person) - == Visibility.hidden - ): + vis = await privacy.person_visibility( + session, user_id=viewer_id, tree=tree, person=person + ) + if vis == Visibility.hidden: raise NotFound("person not found") - await _attach_primary_name(session, person) + if vis == Visibility.redacted: + _redact(person) + else: + await _attach_primary_name(session, person) return person @@ -199,13 +210,66 @@ async def list_persons( visible: list[Person] = [] for person in persons: - if ( - await privacy.person_visibility( - session, user_id=viewer_id, tree=tree, person=person - ) - == Visibility.hidden - ): + vis = await privacy.person_visibility( + session, user_id=viewer_id, tree=tree, person=person + ) + if vis == Visibility.hidden: continue - await _attach_primary_name(session, person) + if vis == Visibility.redacted: + _redact(person) + else: + await _attach_primary_name(session, person) visible.append(person) return visible + + +async def search_persons( + session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tree, query: str, limit: int = 50 +) -> list[Person]: + if not await privacy.can_view_tree(session, user_id=viewer_id, tree=tree): + raise Forbidden("not permitted to view this tree") + q = query.strip() + if not q: + return [] + like = f"%{q}%" + score = func.greatest( + func.similarity(func.coalesce(Name.given, ""), q), + func.similarity(func.coalesce(Name.surname, ""), q), + ) + sub = ( + select(Name.person_id.label("pid"), func.max(score).label("score")) + .where( + Name.tree_id == tree.id, + Name.deleted_at.is_(None), + or_( + Name.given.op("%")(q), + Name.surname.op("%")(q), + Name.given.ilike(like), + Name.surname.ilike(like), + ), + ) + .group_by(Name.person_id) + .order_by(func.max(score).desc()) + .limit(limit) + .subquery() + ) + stmt = ( + select(Person) + .join(sub, sub.c.pid == Person.id) + .where(Person.tree_id == tree.id, Person.deleted_at.is_(None)) + .order_by(sub.c.score.desc()) + ) + persons = list((await session.execute(stmt)).scalars().all()) + out: list[Person] = [] + for person in persons: + vis = await privacy.person_visibility( + session, user_id=viewer_id, tree=tree, person=person + ) + if vis == Visibility.hidden: + continue + if vis == Visibility.redacted: + _redact(person) + else: + await _attach_primary_name(session, person) + out.append(person) + return out diff --git a/backend/app/services/privacy.py b/backend/app/services/privacy.py index e077af0..74df825 100644 --- a/backend/app/services/privacy.py +++ b/backend/app/services/privacy.py @@ -8,14 +8,20 @@ tree's visibility, the per-person override, and (Phase 2) living-person status. import enum import uuid +from datetime import UTC, datetime from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.models.enums import MembershipRole, PersonPrivacy, TreeVisibility +from app.models.event import Event from app.models.person import Person from app.models.tree import Tree, TreeMembership +# A person with no death fact whose birth is within this window (or unknown) is +# treated as possibly living and redacted from non-members (ARCHITECTURE §6). +LIVING_RECENCY_YEARS = 100 + class Visibility(enum.StrEnum): full = "full" @@ -48,15 +54,56 @@ async def can_edit_tree(session: AsyncSession, *, user_id: uuid.UUID | None, tre return role in (MembershipRole.owner, MembershipRole.editor) +async def is_possibly_living(session: AsyncSession, person: Person) -> bool: + """True if the person should be treated as living: explicit flag, or (absent + a death fact) a birth within the recency window or an unknown birth.""" + if person.is_living is True: + return True + if person.is_living is False: + return False + death = ( + await session.execute( + select(Event.id) + .where( + Event.person_id == person.id, + Event.event_type == "death", + Event.deleted_at.is_(None), + ) + .limit(1) + ) + ).scalar_one_or_none() + if death is not None: + return False + birth = ( + await session.execute( + select(Event.date_start) + .where( + Event.person_id == person.id, + Event.event_type == "birth", + Event.date_start.is_not(None), + Event.deleted_at.is_(None), + ) + .order_by(Event.date_start) + .limit(1) + ) + ).scalar_one_or_none() + if birth is None: + return True # unknown birth → treat as possibly living + return (datetime.now(UTC).year - birth.year) < LIVING_RECENCY_YEARS + + async def person_visibility( session: AsyncSession, *, user_id: uuid.UUID | None, tree: Tree, person: Person ) -> Visibility: if not await can_view_tree(session, user_id=user_id, tree=tree): return Visibility.hidden if await get_membership_role(session, user_id, tree.id) is not None: - return Visibility.full + return Visibility.full # members see everyone in their tree # Non-member viewing a public/unlisted tree: if person.privacy == PersonPrivacy.private: return Visibility.hidden - # TODO(Phase 2): redact living people for non-members (ARCHITECTURE §6). + if person.privacy == PersonPrivacy.public: + return Visibility.full # explicit per-person opt-in + if await is_possibly_living(session, person): + return Visibility.redacted # living people are protected by default return Visibility.full diff --git a/backend/migrations/versions/9a2b1c7d4e10_pg_trgm_name_search.py b/backend/migrations/versions/9a2b1c7d4e10_pg_trgm_name_search.py new file mode 100644 index 0000000..5d0d478 --- /dev/null +++ b/backend/migrations/versions/9a2b1c7d4e10_pg_trgm_name_search.py @@ -0,0 +1,33 @@ +"""pg_trgm extension + trigram name indexes for fuzzy search + +Revision ID: 9a2b1c7d4e10 +Revises: 7fc7024ef432 +Create Date: 2026-06-07 + +""" +from collections.abc import Sequence + +from alembic import op + +revision: str = "9a2b1c7d4e10" +down_revision: str | None = "7fc7024ef432" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm") + op.execute( + "CREATE INDEX IF NOT EXISTS ix_names_given_trgm " + "ON names USING gin (given gin_trgm_ops)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_names_surname_trgm " + "ON names USING gin (surname gin_trgm_ops)" + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS ix_names_surname_trgm") + op.execute("DROP INDEX IF EXISTS ix_names_given_trgm") + # Leave the pg_trgm extension in place; other features may rely on it. diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 7a61802..846bf0f 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -11,6 +11,7 @@ import os import pytest import pytest_asyncio from httpx import ASGITransport, AsyncClient +from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine import app.models # noqa: F401 — register all models on Base.metadata @@ -72,6 +73,7 @@ async def client(): engine = create_async_engine(TEST_DATABASE_URL) async with engine.begin() as conn: + await conn.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm")) await conn.run_sync(Base.metadata.drop_all) await conn.run_sync(Base.metadata.create_all) diff --git a/backend/tests/test_privacy_living.py b/backend/tests/test_privacy_living.py new file mode 100644 index 0000000..35dd14c --- /dev/null +++ b/backend/tests/test_privacy_living.py @@ -0,0 +1,36 @@ +"""Living-person protection: living people are redacted from non-members.""" + +from tests.conftest import auth, register + + +async def test_living_person_redacted_for_non_members(client): + owner = auth(await register(client, "pub-owner@example.com")) + tid = ( + await client.post( + "/api/v1/trees", json={"name": "Public", "visibility": "public"}, headers=owner + ) + ).json()["id"] + await client.post( + f"/api/v1/trees/{tid}/persons", + json={"given": "Old", "surname": "Ancestor", "is_living": False}, + headers=owner, + ) + await client.post( + f"/api/v1/trees/{tid}/persons", + json={"given": "Young", "surname": "Living", "is_living": True}, + headers=owner, + ) + + other = auth(await register(client, "pub-viewer@example.com")) + people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=other)).json() + names = {p["primary_name"] for p in people} + assert "Old Ancestor" in names # deceased is visible + assert "Living person" in names # living is redacted + assert "Young Living" not in names # the real living name is hidden + # The redacted person leaks no gender. + living = next(p for p in people if p["primary_name"] == "Living person") + assert living["gender"] is None + + # The owner (a member) sees real names. + owner_people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=owner)).json() + assert "Young Living" in {p["primary_name"] for p in owner_people} diff --git a/backend/tests/test_search.py b/backend/tests/test_search.py new file mode 100644 index 0000000..c04c5c4 --- /dev/null +++ b/backend/tests/test_search.py @@ -0,0 +1,24 @@ +"""Fuzzy name search (pg_trgm).""" + +from tests.conftest import auth, register + + +async def test_fuzzy_name_search(client): + h = auth(await register(client, "search@example.com")) + tid = (await client.post("/api/v1/trees", json={"name": "S"}, headers=h)).json()["id"] + for given, surname in [("Hans", "Mueller"), ("John", "Smith"), ("Anna", "Vogel")]: + await client.post( + f"/api/v1/trees/{tid}/persons", + json={"given": given, "surname": surname}, + headers=h, + ) + + # Trigram fuzziness: "muller" should find "Mueller" (not a substring match). + r = await client.get(f"/api/v1/trees/{tid}/persons", params={"q": "muller"}, headers=h) + assert r.status_code == 200 + names = [p["primary_name"] or "" for p in r.json()] + assert any("Mueller" in n for n in names) + + # Substring search still works. + r2 = await client.get(f"/api/v1/trees/{tid}/persons", params={"q": "smi"}, headers=h) + assert any("Smith" in (p["primary_name"] or "") for p in r2.json())