Cleanup: best-guess sex from first name (offline dictionary)
A "Guess from first name" option in the Cleanup gender section: a bundled, curated given-name -> sex dictionary (weighted English + German for the first real tree) proposes sex for people who don't have it set. Deterministic, offline, no model. Genuinely ambiguous names (Marion, Frances, Jordan, …) are excluded from both sets so they're left for a human. Reuses the existing preview/apply gender flow, so every guess is reviewed before saving. No migration. 56 backend tests pass; frontend builds. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -87,6 +87,28 @@ async def test_gender_from_source(client):
|
||||
assert genders["Josias Moody"] == "male" and genders["Flora Paul"] == "female"
|
||||
|
||||
|
||||
async def test_guess_gender_from_first_name(client):
|
||||
h, tid = await _tree(client, "cl-guess@example.com")
|
||||
await _person(client, h, tid, "William", "Paul") # male
|
||||
await _person(client, h, tid, "Flora", "Reier") # female
|
||||
await _person(client, h, tid, "Marion", "Doe") # ambiguous -> skipped
|
||||
# Already-gendered person is left alone even if guessable.
|
||||
gendered = await _person(client, h, tid, "James", "Known")
|
||||
await client.patch(
|
||||
f"/api/v1/trees/{tid}/persons/{gendered}", json={"gender": "male"}, headers=h
|
||||
)
|
||||
|
||||
prev = (await client.get(f"/api/v1/trees/{tid}/cleanup/gender/guess", headers=h)).json()
|
||||
by = {p["name"]: p["proposed_gender"] for p in prev}
|
||||
assert by == {"William Paul": "male", "Flora Reier": "female"}
|
||||
|
||||
updates = [{"person_id": p["person_id"], "gender": p["proposed_gender"]} for p in prev]
|
||||
r = await client.post(
|
||||
f"/api/v1/trees/{tid}/cleanup/gender", json={"updates": updates}, headers=h
|
||||
)
|
||||
assert r.status_code == 200 and r.json()["updated"] == 2
|
||||
|
||||
|
||||
async def test_name_issues_preview_and_fix(client):
|
||||
h, tid = await _tree(client, "cl-name@example.com")
|
||||
# surname got a date; real surname landed in the given name.
|
||||
|
||||
Reference in New Issue
Block a user