Files
provenance/backend/tests/test_cleanup.py
T
justin aa62ca490e Tree Cleanup tool: bulk fixes with preview → approve
A new per-tree Cleanup page (and cleanup_service + endpoints), each fix
preview-first per the propose-then-approve rule:

- Mark deceased by birth year: lists people born ≤ a cutoff (default 1930) not
  already deceased; apply sets is_living=false for the ones you keep checked.
- Set sex from a source GEDCOM: upload the source .ged (it carries SEX); matches
  by name and proposes sex only where it's missing — far more accurate than
  guessing from first names. Review, then apply.
- Names that look broken: flags date-in-surname / date-in-given / no-surname /
  packed given names, with inline editable given+surname; fix the checked ones.

No migration (uses existing columns). 55 backend tests pass (preview+apply for
all three); frontend builds.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 10:17:01 -04:00

108 lines
3.8 KiB
Python

"""Tree cleanup: preview/apply for deceased-by-year, gender-from-source, names."""
from tests.conftest import auth, register
async def _tree(client, email):
h = auth(await register(client, email))
tid = (await client.post("/api/v1/trees", json={"name": "T"}, headers=h)).json()["id"]
return h, tid
async def _person(client, h, tid, given, surname=None):
return (
await client.post(
f"/api/v1/trees/{tid}/persons", json={"given": given, "surname": surname}, headers=h
)
).json()["id"]
async def _birth(client, h, tid, pid, year):
await client.post(
f"/api/v1/trees/{tid}/events",
json={"event_type": "birth", "person_id": pid, "date_value": str(year)},
headers=h,
)
async def test_deceased_preview_and_apply(client):
h, tid = await _tree(client, "cl-dec@example.com")
old = await _person(client, h, tid, "Josias", "Moody")
young = await _person(client, h, tid, "Kid", "Moody")
await _birth(client, h, tid, old, 1900)
await _birth(client, h, tid, young, 1990)
prev = (
await client.get(f"/api/v1/trees/{tid}/cleanup/deceased?born_on_or_before=1930", headers=h)
).json()
assert [r["person_id"] for r in prev] == [old]
r = await client.post(
f"/api/v1/trees/{tid}/cleanup/deceased", json={"person_ids": [old]}, headers=h
)
assert r.status_code == 200 and r.json()["updated"] == 1
assert (
await client.get(f"/api/v1/trees/{tid}/persons/{old}", headers=h)
).json()["is_living"] is False
# Re-preview no longer lists the now-deceased person.
prev2 = (
await client.get(f"/api/v1/trees/{tid}/cleanup/deceased?born_on_or_before=1930", headers=h)
).json()
assert old not in [r["person_id"] for r in prev2]
GED = b"""0 HEAD
0 @I1@ INDI
1 NAME Josias /Moody/
1 SEX M
0 @I2@ INDI
1 NAME Flora /Paul/
1 SEX F
0 TRLR
"""
async def test_gender_from_source(client):
h, tid = await _tree(client, "cl-gen@example.com")
await _person(client, h, tid, "Josias", "Moody")
await _person(client, h, tid, "Flora", "Paul")
await _person(client, h, tid, "Nobody", "Else") # not in source
prev = await client.post(
f"/api/v1/trees/{tid}/cleanup/gender/preview",
files={"file": ("src.ged", GED, "text/plain")},
headers=h,
)
props = prev.json()
by_name = {p["name"]: p["proposed_gender"] for p in props}
assert by_name == {"Josias Moody": "male", "Flora Paul": "female"}
updates = [{"person_id": p["person_id"], "gender": p["proposed_gender"]} for p in props]
r = await client.post(
f"/api/v1/trees/{tid}/cleanup/gender", json={"updates": updates}, headers=h
)
assert r.status_code == 200 and r.json()["updated"] == 2
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
genders = {p["primary_name"]: p["gender"] for p in people}
assert genders["Josias Moody"] == "male" and genders["Flora Paul"] == "female"
async def test_name_issues_preview_and_fix(client):
h, tid = await _tree(client, "cl-name@example.com")
# surname got a date; real surname landed in the given name.
bad = await _person(client, h, tid, "Henry Paul", "1859")
await _person(client, h, tid, "Normal", "Person") # should not be flagged
issues = (await client.get(f"/api/v1/trees/{tid}/cleanup/names", headers=h)).json()
assert len(issues) == 1 and issues[0]["issue"] == "date_in_surname"
name_id = issues[0]["name_id"]
r = await client.post(
f"/api/v1/trees/{tid}/cleanup/names",
json={"edits": [{"name_id": name_id, "given": "Henry", "surname": "Paul"}]},
headers=h,
)
assert r.status_code == 200 and r.json()["updated"] == 1
person = (await client.get(f"/api/v1/trees/{tid}/persons/{bad}", headers=h)).json()
assert person["primary_name"] == "Henry Paul"