Add fuzzy name search (pg_trgm) and living-person protection

Fuzzy search: pg_trgm extension + trigram GIN indexes on name parts and a GET /trees/{id}/persons?q= search ranked by trigram similarity (finds Mueller for 'muller'), privacy-filtered. Living-person protection: the privacy engine now derives possibly-living status (explicit flag, else no death fact + birth within ~100y or unknown) and returns 'redacted' for non-members of public/unlisted trees; the service minimises those records ('Living person', no vitals). Members are unaffected. 31 tests pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Justin Paul <justin@jpaul.me>
This commit is contained in:
2026-06-07 07:55:13 -04:00
parent 51f0066e61
commit 4788ae7723
8 changed files with 248 additions and 18 deletions
+2
View File
@@ -11,6 +11,7 @@ import os
import pytest
import pytest_asyncio
from httpx import ASGITransport, AsyncClient
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
import app.models # noqa: F401 — register all models on Base.metadata
@@ -72,6 +73,7 @@ async def client():
engine = create_async_engine(TEST_DATABASE_URL)
async with engine.begin() as conn:
await conn.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm"))
await conn.run_sync(Base.metadata.drop_all)
await conn.run_sync(Base.metadata.create_all)
+36
View File
@@ -0,0 +1,36 @@
"""Living-person protection: living people are redacted from non-members."""
from tests.conftest import auth, register
async def test_living_person_redacted_for_non_members(client):
owner = auth(await register(client, "pub-owner@example.com"))
tid = (
await client.post(
"/api/v1/trees", json={"name": "Public", "visibility": "public"}, headers=owner
)
).json()["id"]
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Old", "surname": "Ancestor", "is_living": False},
headers=owner,
)
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "Young", "surname": "Living", "is_living": True},
headers=owner,
)
other = auth(await register(client, "pub-viewer@example.com"))
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=other)).json()
names = {p["primary_name"] for p in people}
assert "Old Ancestor" in names # deceased is visible
assert "Living person" in names # living is redacted
assert "Young Living" not in names # the real living name is hidden
# The redacted person leaks no gender.
living = next(p for p in people if p["primary_name"] == "Living person")
assert living["gender"] is None
# The owner (a member) sees real names.
owner_people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=owner)).json()
assert "Young Living" in {p["primary_name"] for p in owner_people}
+24
View File
@@ -0,0 +1,24 @@
"""Fuzzy name search (pg_trgm)."""
from tests.conftest import auth, register
async def test_fuzzy_name_search(client):
h = auth(await register(client, "search@example.com"))
tid = (await client.post("/api/v1/trees", json={"name": "S"}, headers=h)).json()["id"]
for given, surname in [("Hans", "Mueller"), ("John", "Smith"), ("Anna", "Vogel")]:
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": given, "surname": surname},
headers=h,
)
# Trigram fuzziness: "muller" should find "Mueller" (not a substring match).
r = await client.get(f"/api/v1/trees/{tid}/persons", params={"q": "muller"}, headers=h)
assert r.status_code == 200
names = [p["primary_name"] or "" for p in r.json()]
assert any("Mueller" in n for n in names)
# Substring search still works.
r2 = await client.get(f"/api/v1/trees/{tid}/persons", params={"q": "smi"}, headers=h)
assert any("Smith" in (p["primary_name"] or "") for p in r2.json())