Fuzzy search (pg_trgm) + living-person protection #15
@@ -36,10 +36,18 @@ async def create_person(
|
|||||||
|
|
||||||
@router.get("/{tree_id}/persons", response_model=list[PersonRead])
|
@router.get("/{tree_id}/persons", response_model=list[PersonRead])
|
||||||
async def list_persons(
|
async def list_persons(
|
||||||
tree_id: uuid.UUID, session: SessionDep, current: CurrentUser, deleted: bool = False
|
tree_id: uuid.UUID,
|
||||||
|
session: SessionDep,
|
||||||
|
current: CurrentUser,
|
||||||
|
deleted: bool = False,
|
||||||
|
q: str | None = None,
|
||||||
) -> list[PersonRead]:
|
) -> list[PersonRead]:
|
||||||
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
|
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
|
||||||
if deleted:
|
if q:
|
||||||
|
persons = await person_service.search_persons(
|
||||||
|
session, viewer_id=current.id, tree=tree, query=q
|
||||||
|
)
|
||||||
|
elif deleted:
|
||||||
persons = await person_service.list_deleted_persons(
|
persons = await person_service.list_deleted_persons(
|
||||||
session, viewer_id=current.id, tree=tree
|
session, viewer_id=current.id, tree=tree
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ aliases) so name changes over time are first-class.
|
|||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from sqlalchemy import Boolean, ForeignKey, Integer, String, Text, text
|
from sqlalchemy import Boolean, ForeignKey, Index, Integer, String, Text, text
|
||||||
from sqlalchemy import Enum as SAEnum
|
from sqlalchemy import Enum as SAEnum
|
||||||
from sqlalchemy.orm import Mapped, mapped_column
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
|
|
||||||
@@ -33,6 +33,22 @@ class Person(Base, UUIDPrimaryKey, TenantScoped, Timestamps, SoftDelete):
|
|||||||
|
|
||||||
class Name(Base, UUIDPrimaryKey, TenantScoped, Timestamps, SoftDelete):
|
class Name(Base, UUIDPrimaryKey, TenantScoped, Timestamps, SoftDelete):
|
||||||
__tablename__ = "names"
|
__tablename__ = "names"
|
||||||
|
# Trigram indexes for fuzzy name search (Mueller/Müller/Muller). Requires the
|
||||||
|
# pg_trgm extension (enabled in the accompanying migration).
|
||||||
|
__table_args__ = (
|
||||||
|
Index(
|
||||||
|
"ix_names_given_trgm",
|
||||||
|
"given",
|
||||||
|
postgresql_using="gin",
|
||||||
|
postgresql_ops={"given": "gin_trgm_ops"},
|
||||||
|
),
|
||||||
|
Index(
|
||||||
|
"ix_names_surname_trgm",
|
||||||
|
"surname",
|
||||||
|
postgresql_using="gin",
|
||||||
|
postgresql_ops={"surname": "gin_trgm_ops"},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
person_id: Mapped[uuid.UUID] = mapped_column(
|
person_id: Mapped[uuid.UUID] = mapped_column(
|
||||||
ForeignKey("persons.id", ondelete="CASCADE"), index=True
|
ForeignKey("persons.id", ondelete="CASCADE"), index=True
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ person through the privacy engine. Each returned Person gets a transient
|
|||||||
import uuid
|
import uuid
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
from sqlalchemy import select
|
from sqlalchemy import func, or_, select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.models.enums import PersonPrivacy
|
from app.models.enums import PersonPrivacy
|
||||||
@@ -25,6 +25,14 @@ def _format_name(name: Name) -> str | None:
|
|||||||
return joined or name.display_name
|
return joined or name.display_name
|
||||||
|
|
||||||
|
|
||||||
|
def _redact(person: Person) -> None:
|
||||||
|
"""Minimise a possibly-living person for a non-member view (transient only —
|
||||||
|
never committed)."""
|
||||||
|
person.primary_name = "Living person"
|
||||||
|
person.gender = None
|
||||||
|
person.is_living = True
|
||||||
|
|
||||||
|
|
||||||
async def _attach_primary_name(session: AsyncSession, person: Person) -> None:
|
async def _attach_primary_name(session: AsyncSession, person: Person) -> None:
|
||||||
stmt = (
|
stmt = (
|
||||||
select(Name)
|
select(Name)
|
||||||
@@ -104,12 +112,15 @@ async def get_person(
|
|||||||
if person is None:
|
if person is None:
|
||||||
raise NotFound("person not found")
|
raise NotFound("person not found")
|
||||||
# Run the single person through the privacy engine (redaction lands Phase 2).
|
# Run the single person through the privacy engine (redaction lands Phase 2).
|
||||||
if (
|
vis = await privacy.person_visibility(
|
||||||
await privacy.person_visibility(session, user_id=viewer_id, tree=tree, person=person)
|
session, user_id=viewer_id, tree=tree, person=person
|
||||||
== Visibility.hidden
|
)
|
||||||
):
|
if vis == Visibility.hidden:
|
||||||
raise NotFound("person not found")
|
raise NotFound("person not found")
|
||||||
await _attach_primary_name(session, person)
|
if vis == Visibility.redacted:
|
||||||
|
_redact(person)
|
||||||
|
else:
|
||||||
|
await _attach_primary_name(session, person)
|
||||||
return person
|
return person
|
||||||
|
|
||||||
|
|
||||||
@@ -199,13 +210,66 @@ async def list_persons(
|
|||||||
|
|
||||||
visible: list[Person] = []
|
visible: list[Person] = []
|
||||||
for person in persons:
|
for person in persons:
|
||||||
if (
|
vis = await privacy.person_visibility(
|
||||||
await privacy.person_visibility(
|
session, user_id=viewer_id, tree=tree, person=person
|
||||||
session, user_id=viewer_id, tree=tree, person=person
|
)
|
||||||
)
|
if vis == Visibility.hidden:
|
||||||
== Visibility.hidden
|
|
||||||
):
|
|
||||||
continue
|
continue
|
||||||
await _attach_primary_name(session, person)
|
if vis == Visibility.redacted:
|
||||||
|
_redact(person)
|
||||||
|
else:
|
||||||
|
await _attach_primary_name(session, person)
|
||||||
visible.append(person)
|
visible.append(person)
|
||||||
return visible
|
return visible
|
||||||
|
|
||||||
|
|
||||||
|
async def search_persons(
|
||||||
|
session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tree, query: str, limit: int = 50
|
||||||
|
) -> list[Person]:
|
||||||
|
if not await privacy.can_view_tree(session, user_id=viewer_id, tree=tree):
|
||||||
|
raise Forbidden("not permitted to view this tree")
|
||||||
|
q = query.strip()
|
||||||
|
if not q:
|
||||||
|
return []
|
||||||
|
like = f"%{q}%"
|
||||||
|
score = func.greatest(
|
||||||
|
func.similarity(func.coalesce(Name.given, ""), q),
|
||||||
|
func.similarity(func.coalesce(Name.surname, ""), q),
|
||||||
|
)
|
||||||
|
sub = (
|
||||||
|
select(Name.person_id.label("pid"), func.max(score).label("score"))
|
||||||
|
.where(
|
||||||
|
Name.tree_id == tree.id,
|
||||||
|
Name.deleted_at.is_(None),
|
||||||
|
or_(
|
||||||
|
Name.given.op("%")(q),
|
||||||
|
Name.surname.op("%")(q),
|
||||||
|
Name.given.ilike(like),
|
||||||
|
Name.surname.ilike(like),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.group_by(Name.person_id)
|
||||||
|
.order_by(func.max(score).desc())
|
||||||
|
.limit(limit)
|
||||||
|
.subquery()
|
||||||
|
)
|
||||||
|
stmt = (
|
||||||
|
select(Person)
|
||||||
|
.join(sub, sub.c.pid == Person.id)
|
||||||
|
.where(Person.tree_id == tree.id, Person.deleted_at.is_(None))
|
||||||
|
.order_by(sub.c.score.desc())
|
||||||
|
)
|
||||||
|
persons = list((await session.execute(stmt)).scalars().all())
|
||||||
|
out: list[Person] = []
|
||||||
|
for person in persons:
|
||||||
|
vis = await privacy.person_visibility(
|
||||||
|
session, user_id=viewer_id, tree=tree, person=person
|
||||||
|
)
|
||||||
|
if vis == Visibility.hidden:
|
||||||
|
continue
|
||||||
|
if vis == Visibility.redacted:
|
||||||
|
_redact(person)
|
||||||
|
else:
|
||||||
|
await _attach_primary_name(session, person)
|
||||||
|
out.append(person)
|
||||||
|
return out
|
||||||
|
|||||||
@@ -8,14 +8,20 @@ tree's visibility, the per-person override, and (Phase 2) living-person status.
|
|||||||
|
|
||||||
import enum
|
import enum
|
||||||
import uuid
|
import uuid
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.models.enums import MembershipRole, PersonPrivacy, TreeVisibility
|
from app.models.enums import MembershipRole, PersonPrivacy, TreeVisibility
|
||||||
|
from app.models.event import Event
|
||||||
from app.models.person import Person
|
from app.models.person import Person
|
||||||
from app.models.tree import Tree, TreeMembership
|
from app.models.tree import Tree, TreeMembership
|
||||||
|
|
||||||
|
# A person with no death fact whose birth is within this window (or unknown) is
|
||||||
|
# treated as possibly living and redacted from non-members (ARCHITECTURE §6).
|
||||||
|
LIVING_RECENCY_YEARS = 100
|
||||||
|
|
||||||
|
|
||||||
class Visibility(enum.StrEnum):
|
class Visibility(enum.StrEnum):
|
||||||
full = "full"
|
full = "full"
|
||||||
@@ -48,15 +54,56 @@ async def can_edit_tree(session: AsyncSession, *, user_id: uuid.UUID | None, tre
|
|||||||
return role in (MembershipRole.owner, MembershipRole.editor)
|
return role in (MembershipRole.owner, MembershipRole.editor)
|
||||||
|
|
||||||
|
|
||||||
|
async def is_possibly_living(session: AsyncSession, person: Person) -> bool:
|
||||||
|
"""True if the person should be treated as living: explicit flag, or (absent
|
||||||
|
a death fact) a birth within the recency window or an unknown birth."""
|
||||||
|
if person.is_living is True:
|
||||||
|
return True
|
||||||
|
if person.is_living is False:
|
||||||
|
return False
|
||||||
|
death = (
|
||||||
|
await session.execute(
|
||||||
|
select(Event.id)
|
||||||
|
.where(
|
||||||
|
Event.person_id == person.id,
|
||||||
|
Event.event_type == "death",
|
||||||
|
Event.deleted_at.is_(None),
|
||||||
|
)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
).scalar_one_or_none()
|
||||||
|
if death is not None:
|
||||||
|
return False
|
||||||
|
birth = (
|
||||||
|
await session.execute(
|
||||||
|
select(Event.date_start)
|
||||||
|
.where(
|
||||||
|
Event.person_id == person.id,
|
||||||
|
Event.event_type == "birth",
|
||||||
|
Event.date_start.is_not(None),
|
||||||
|
Event.deleted_at.is_(None),
|
||||||
|
)
|
||||||
|
.order_by(Event.date_start)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
).scalar_one_or_none()
|
||||||
|
if birth is None:
|
||||||
|
return True # unknown birth → treat as possibly living
|
||||||
|
return (datetime.now(UTC).year - birth.year) < LIVING_RECENCY_YEARS
|
||||||
|
|
||||||
|
|
||||||
async def person_visibility(
|
async def person_visibility(
|
||||||
session: AsyncSession, *, user_id: uuid.UUID | None, tree: Tree, person: Person
|
session: AsyncSession, *, user_id: uuid.UUID | None, tree: Tree, person: Person
|
||||||
) -> Visibility:
|
) -> Visibility:
|
||||||
if not await can_view_tree(session, user_id=user_id, tree=tree):
|
if not await can_view_tree(session, user_id=user_id, tree=tree):
|
||||||
return Visibility.hidden
|
return Visibility.hidden
|
||||||
if await get_membership_role(session, user_id, tree.id) is not None:
|
if await get_membership_role(session, user_id, tree.id) is not None:
|
||||||
return Visibility.full
|
return Visibility.full # members see everyone in their tree
|
||||||
# Non-member viewing a public/unlisted tree:
|
# Non-member viewing a public/unlisted tree:
|
||||||
if person.privacy == PersonPrivacy.private:
|
if person.privacy == PersonPrivacy.private:
|
||||||
return Visibility.hidden
|
return Visibility.hidden
|
||||||
# TODO(Phase 2): redact living people for non-members (ARCHITECTURE §6).
|
if person.privacy == PersonPrivacy.public:
|
||||||
|
return Visibility.full # explicit per-person opt-in
|
||||||
|
if await is_possibly_living(session, person):
|
||||||
|
return Visibility.redacted # living people are protected by default
|
||||||
return Visibility.full
|
return Visibility.full
|
||||||
|
|||||||
@@ -0,0 +1,33 @@
|
|||||||
|
"""pg_trgm extension + trigram name indexes for fuzzy search
|
||||||
|
|
||||||
|
Revision ID: 9a2b1c7d4e10
|
||||||
|
Revises: 7fc7024ef432
|
||||||
|
Create Date: 2026-06-07
|
||||||
|
|
||||||
|
"""
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "9a2b1c7d4e10"
|
||||||
|
down_revision: str | None = "7fc7024ef432"
|
||||||
|
branch_labels: str | Sequence[str] | None = None
|
||||||
|
depends_on: str | Sequence[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
|
||||||
|
op.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS ix_names_given_trgm "
|
||||||
|
"ON names USING gin (given gin_trgm_ops)"
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS ix_names_surname_trgm "
|
||||||
|
"ON names USING gin (surname gin_trgm_ops)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.execute("DROP INDEX IF EXISTS ix_names_surname_trgm")
|
||||||
|
op.execute("DROP INDEX IF EXISTS ix_names_given_trgm")
|
||||||
|
# Leave the pg_trgm extension in place; other features may rely on it.
|
||||||
@@ -11,6 +11,7 @@ import os
|
|||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from httpx import ASGITransport, AsyncClient
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
from sqlalchemy import text
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||||
|
|
||||||
import app.models # noqa: F401 — register all models on Base.metadata
|
import app.models # noqa: F401 — register all models on Base.metadata
|
||||||
@@ -72,6 +73,7 @@ async def client():
|
|||||||
|
|
||||||
engine = create_async_engine(TEST_DATABASE_URL)
|
engine = create_async_engine(TEST_DATABASE_URL)
|
||||||
async with engine.begin() as conn:
|
async with engine.begin() as conn:
|
||||||
|
await conn.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm"))
|
||||||
await conn.run_sync(Base.metadata.drop_all)
|
await conn.run_sync(Base.metadata.drop_all)
|
||||||
await conn.run_sync(Base.metadata.create_all)
|
await conn.run_sync(Base.metadata.create_all)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
"""Living-person protection: living people are redacted from non-members."""
|
||||||
|
|
||||||
|
from tests.conftest import auth, register
|
||||||
|
|
||||||
|
|
||||||
|
async def test_living_person_redacted_for_non_members(client):
|
||||||
|
owner = auth(await register(client, "pub-owner@example.com"))
|
||||||
|
tid = (
|
||||||
|
await client.post(
|
||||||
|
"/api/v1/trees", json={"name": "Public", "visibility": "public"}, headers=owner
|
||||||
|
)
|
||||||
|
).json()["id"]
|
||||||
|
await client.post(
|
||||||
|
f"/api/v1/trees/{tid}/persons",
|
||||||
|
json={"given": "Old", "surname": "Ancestor", "is_living": False},
|
||||||
|
headers=owner,
|
||||||
|
)
|
||||||
|
await client.post(
|
||||||
|
f"/api/v1/trees/{tid}/persons",
|
||||||
|
json={"given": "Young", "surname": "Living", "is_living": True},
|
||||||
|
headers=owner,
|
||||||
|
)
|
||||||
|
|
||||||
|
other = auth(await register(client, "pub-viewer@example.com"))
|
||||||
|
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=other)).json()
|
||||||
|
names = {p["primary_name"] for p in people}
|
||||||
|
assert "Old Ancestor" in names # deceased is visible
|
||||||
|
assert "Living person" in names # living is redacted
|
||||||
|
assert "Young Living" not in names # the real living name is hidden
|
||||||
|
# The redacted person leaks no gender.
|
||||||
|
living = next(p for p in people if p["primary_name"] == "Living person")
|
||||||
|
assert living["gender"] is None
|
||||||
|
|
||||||
|
# The owner (a member) sees real names.
|
||||||
|
owner_people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=owner)).json()
|
||||||
|
assert "Young Living" in {p["primary_name"] for p in owner_people}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
"""Fuzzy name search (pg_trgm)."""
|
||||||
|
|
||||||
|
from tests.conftest import auth, register
|
||||||
|
|
||||||
|
|
||||||
|
async def test_fuzzy_name_search(client):
|
||||||
|
h = auth(await register(client, "search@example.com"))
|
||||||
|
tid = (await client.post("/api/v1/trees", json={"name": "S"}, headers=h)).json()["id"]
|
||||||
|
for given, surname in [("Hans", "Mueller"), ("John", "Smith"), ("Anna", "Vogel")]:
|
||||||
|
await client.post(
|
||||||
|
f"/api/v1/trees/{tid}/persons",
|
||||||
|
json={"given": given, "surname": surname},
|
||||||
|
headers=h,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trigram fuzziness: "muller" should find "Mueller" (not a substring match).
|
||||||
|
r = await client.get(f"/api/v1/trees/{tid}/persons", params={"q": "muller"}, headers=h)
|
||||||
|
assert r.status_code == 200
|
||||||
|
names = [p["primary_name"] or "" for p in r.json()]
|
||||||
|
assert any("Mueller" in n for n in names)
|
||||||
|
|
||||||
|
# Substring search still works.
|
||||||
|
r2 = await client.get(f"/api/v1/trees/{tid}/persons", params={"q": "smi"}, headers=h)
|
||||||
|
assert any("Smith" in (p["primary_name"] or "") for p in r2.json())
|
||||||
Reference in New Issue
Block a user