Files
provenance/backend/app/api/v1/cleanup.py
T
justin 6ec852a23a Cleanup: best-guess sex from first name (offline dictionary)
A "Guess from first name" option in the Cleanup gender section: a bundled,
curated given-name -> sex dictionary (weighted English + German for the first
real tree) proposes sex for people who don't have it set. Deterministic, offline,
no model. Genuinely ambiguous names (Marion, Frances, Jordan, …) are excluded
from both sets so they're left for a human. Reuses the existing preview/apply
gender flow, so every guess is reviewed before saving.

No migration. 56 backend tests pass; frontend builds.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 10:30:35 -04:00

102 lines
3.7 KiB
Python

import uuid
from fastapi import APIRouter, File, UploadFile
from app.api.deps import CurrentUser, SessionDep
from app.schemas.cleanup import (
CleanupResult,
DeceasedApply,
DeceasedCandidate,
GenderApply,
GenderProposal,
NameApply,
NameIssue,
)
from app.services import cleanup_service, tree_service
router = APIRouter(prefix="/trees", tags=["cleanup"])
@router.get("/{tree_id}/cleanup/deceased", response_model=list[DeceasedCandidate])
async def preview_deceased(
tree_id: uuid.UUID,
session: SessionDep,
current: CurrentUser,
born_on_or_before: int = 1930,
) -> list[DeceasedCandidate]:
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
rows = await cleanup_service.preview_deceased(
session, actor=current, tree=tree, year=born_on_or_before
)
return [DeceasedCandidate(**r) for r in rows]
@router.post("/{tree_id}/cleanup/deceased", response_model=CleanupResult)
async def apply_deceased(
tree_id: uuid.UUID, data: DeceasedApply, session: SessionDep, current: CurrentUser
) -> CleanupResult:
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
n = await cleanup_service.apply_deceased(
session, actor=current, tree=tree, person_ids=data.person_ids
)
return CleanupResult(updated=n)
@router.post("/{tree_id}/cleanup/gender/preview", response_model=list[GenderProposal])
async def preview_gender(
tree_id: uuid.UUID,
session: SessionDep,
current: CurrentUser,
file: UploadFile = File(...),
) -> list[GenderProposal]:
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
text = (await file.read()).decode("utf-8", errors="replace")
rows = await cleanup_service.preview_gender(
session, actor=current, tree=tree, gedcom_text=text
)
return [GenderProposal(**r) for r in rows]
@router.get("/{tree_id}/cleanup/gender/guess", response_model=list[GenderProposal])
async def guess_gender(
tree_id: uuid.UUID, session: SessionDep, current: CurrentUser
) -> list[GenderProposal]:
"""Best-guess sex from first names (bundled dictionary) for people missing it."""
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
rows = await cleanup_service.guess_gender_by_name(session, actor=current, tree=tree)
return [GenderProposal(**r) for r in rows]
@router.post("/{tree_id}/cleanup/gender", response_model=CleanupResult)
async def apply_gender(
tree_id: uuid.UUID, data: GenderApply, session: SessionDep, current: CurrentUser
) -> CleanupResult:
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
n = await cleanup_service.apply_gender(
session,
actor=current,
tree=tree,
updates=[u.model_dump() for u in data.updates],
)
return CleanupResult(updated=n)
@router.get("/{tree_id}/cleanup/names", response_model=list[NameIssue])
async def preview_names(
tree_id: uuid.UUID, session: SessionDep, current: CurrentUser
) -> list[NameIssue]:
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
rows = await cleanup_service.preview_names(session, actor=current, tree=tree)
return [NameIssue(**r) for r in rows]
@router.post("/{tree_id}/cleanup/names", response_model=CleanupResult)
async def apply_names(
tree_id: uuid.UUID, data: NameApply, session: SessionDep, current: CurrentUser
) -> CleanupResult:
tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
n = await cleanup_service.apply_names(
session, actor=current, tree=tree, edits=[e.model_dump() for e in data.edits]
)
return CleanupResult(updated=n)