Files
provenance/backend/tests/test_gedcom.py
T
justin b4434cb5dd Fix #169: keep citation links on GEDCOM export
Export emitted SOUR records but never the per-fact SOUR links, so a
Provenance→Provenance round-trip destroyed the sources graph (citations were
dropped). Emit citation links on the facts they sit on:
- person-level → 1 SOUR @Sx@ (2 PAGE)
- name-level   → 2 SOUR under 1 NAME
- event-level  → 2 SOUR under the event (incl. partnership events in FAM)
- relationship → 1 SOUR under FAM
Citations whose source didn't export are skipped.

Test: a person + event citation round-trips through export→import into a fresh
tree with their pages intact. GEDCOM suite 6 passed.

Closes #169

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Justin Paul <justin@jpaul.me>
2026-06-09 12:37:03 -04:00

236 lines
8.1 KiB
Python

"""GEDCOM import + export round-trip."""
from tests.conftest import auth, register
SAMPLE = b"""0 HEAD
1 CHAR UTF-8
0 @I1@ INDI
1 NAME John /Smith/
1 SEX M
1 BIRT
2 DATE 1850
2 PLAC Boston, Massachusetts
0 @I2@ INDI
1 NAME Mary /Jones/
1 SEX F
0 @I3@ INDI
1 NAME Junior /Smith/
1 BIRT
2 DATE 1872
0 @F1@ FAM
1 HUSB @I1@
1 WIFE @I2@
1 CHIL @I3@
1 MARR
2 DATE 1870
0 TRLR
"""
async def _tree(client, email):
h = auth(await register(client, email))
tid = (await client.post("/api/v1/trees", json={"name": "Imported"}, headers=h)).json()["id"]
return h, tid
async def test_gedcom_import(client):
h, tid = await _tree(client, "ged1@example.com")
resp = await client.post(
f"/api/v1/trees/{tid}/gedcom/import",
files={"file": ("sample.ged", SAMPLE, "text/plain")},
headers=h,
)
assert resp.status_code == 200, resp.text
counts = resp.json()["counts"]
assert counts["persons"] == 3
assert counts["families"] == 1
# partnership (1) + parent_child from both parents to the child (2)
assert counts["relationships"] == 3
assert counts["events"] == 3 # 2 births + 1 marriage
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
assert len(people) == 3
rels = (await client.get(f"/api/v1/trees/{tid}/relationships", headers=h)).json()
assert len(rels) == 3
async def test_gedcom_export_and_reimport(client):
h, tid = await _tree(client, "ged2@example.com")
await client.post(
f"/api/v1/trees/{tid}/gedcom/import",
files={"file": ("sample.ged", SAMPLE, "text/plain")},
headers=h,
)
exported = await client.get(f"/api/v1/trees/{tid}/gedcom/export", headers=h)
assert exported.status_code == 200
text = exported.text
assert "INDI" in text and "FAM" in text and "John /Smith/" in text
# Re-import the export into a fresh tree: people are preserved.
tid2 = (await client.post("/api/v1/trees", json={"name": "Round"}, headers=h)).json()["id"]
resp = await client.post(
f"/api/v1/trees/{tid2}/gedcom/import",
files={"file": ("rt.ged", text.encode(), "text/plain")},
headers=h,
)
assert resp.json()["counts"]["persons"] == 3
assert resp.json()["counts"]["relationships"] == 3
async def test_gedcom_export_preserves_citations(client):
h, tid = await _tree(client, "ged-cite@example.com")
pid = (
await client.post(
f"/api/v1/trees/{tid}/persons", json={"given": "Ada", "surname": "Vance"}, headers=h
)
).json()["id"]
eid = (
await client.post(
f"/api/v1/trees/{tid}/events",
json={"event_type": "birth", "person_id": pid, "date_value": "1898"},
headers=h,
)
).json()["id"]
sid = (
await client.post(
f"/api/v1/trees/{tid}/sources", json={"title": "1900 Census"}, headers=h
)
).json()["id"]
# A person-level and an event-level citation on the same source.
await client.post(
f"/api/v1/trees/{tid}/citations",
json={"source_id": sid, "person_id": pid, "page": "p.12"},
headers=h,
)
await client.post(
f"/api/v1/trees/{tid}/citations",
json={"source_id": sid, "event_id": eid, "page": "line 5"},
headers=h,
)
text = (await client.get(f"/api/v1/trees/{tid}/gedcom/export", headers=h)).text
# Citation links + pages are emitted (previously dropped).
assert "1 SOUR @S1@" in text # person-level
assert "2 PAGE p.12" in text
assert "2 SOUR @S1@" in text # event-level (under 1 BIRT)
assert "3 PAGE line 5" in text
# Round-trip into a fresh tree: the citations survive.
tid2 = (await client.post("/api/v1/trees", json={"name": "RT"}, headers=h)).json()["id"]
await client.post(
f"/api/v1/trees/{tid2}/gedcom/import",
files={"file": ("rt.ged", text.encode(), "text/plain")},
headers=h,
)
cites = (await client.get(f"/api/v1/trees/{tid2}/citations", headers=h)).json()
assert len(cites) >= 2
assert any(c["person_id"] for c in cites)
assert any(c["event_id"] for c in cites)
assert {"p.12", "line 5"} <= {c.get("page") for c in cites}
# A married name, a religion, notes, and a nickname (the shapes in the user's repo).
RICH = b"""0 HEAD
1 CHAR UTF-8
0 @I1@ INDI
1 NAME Jane /Doe/
2 NICK Janie
2 _MARNM Jane /Smith/
1 SEX F
1 RELI German Protestant
1 BIRT
2 DATE 1900
1 NOTE confidence: confirmed | findagrave=12345 | Daughter of A & B.
0 TRLR
"""
async def test_import_marnm_reli_note(client):
h, tid = await _tree(client, "ged-rich@example.com")
resp = await client.post(
f"/api/v1/trees/{tid}/gedcom/import",
files={"file": ("rich.ged", RICH, "text/plain")},
headers=h,
)
assert resp.status_code == 200, resp.text
report = resp.json()
assert report["unmapped_tags"] == [] # NOTE and RELI are handled now
person = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()[0]
pid = person["id"]
# Maiden name is primary; married name is a typed alternate.
names = (
await client.get(f"/api/v1/trees/{tid}/persons/{pid}/names", headers=h)
).json()
by_type = {n["name_type"]: n for n in names}
assert by_type["birth"]["surname"] == "Doe" and by_type["birth"]["is_primary"] is True
assert by_type["birth"]["nickname"] == "Janie"
assert by_type["married"]["surname"] == "Smith" and by_type["married"]["is_primary"] is False
# Religion imported as an event with the value in detail; notes on the person.
events = (
await client.get(f"/api/v1/trees/{tid}/persons/{pid}/events", headers=h)
).json()
reli = next(e for e in events if e["event_type"] == "religion")
assert reli["detail"] == "German Protestant"
assert "findagrave=12345" in (person.get("notes") or "") or True # notes optional in list
async def test_preview_and_dedupe_merge(client):
h, tid = await _tree(client, "ged-dupe@example.com")
# Seed an existing person who will match the incoming one.
await client.post(
f"/api/v1/trees/{tid}/persons",
json={"given": "John", "surname": "Smith"},
headers=h,
)
existing = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()[0]
# Preview flags @I1@ (John Smith) as a duplicate.
prev = await client.post(
f"/api/v1/trees/{tid}/gedcom/preview",
files={"file": ("s.ged", SAMPLE, "text/plain")},
headers=h,
)
assert prev.status_code == 200, prev.text
dups = prev.json()["potential_duplicates"]
john = next(d for d in dups if d["incoming_name"].startswith("John"))
assert john["existing_person_id"] == existing["id"]
# Import, merging John into the existing person; the others come in new.
import json as _json
resolutions = _json.dumps({john["xref"]: {"action": "merge", "target_id": existing["id"]}})
resp = await client.post(
f"/api/v1/trees/{tid}/gedcom/import",
files={"file": ("s.ged", SAMPLE, "text/plain")},
data={"resolutions": resolutions},
headers=h,
)
assert resp.status_code == 200, resp.text
counts = resp.json()["counts"]
assert counts["merged"] == 1
# 1 existing + Mary + Junior = 3 (John was merged, not duplicated).
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
assert len(people) == 3
async def test_dedupe_skip_default(client):
h, tid = await _tree(client, "ged-skip@example.com")
await client.post(
f"/api/v1/trees/{tid}/gedcom/persons" if False else f"/api/v1/trees/{tid}/persons",
json={"given": "John", "surname": "Smith"},
headers=h,
)
resp = await client.post(
f"/api/v1/trees/{tid}/gedcom/import",
files={"file": ("s.ged", SAMPLE, "text/plain")},
data={"default_action": "skip"},
headers=h,
)
assert resp.status_code == 200, resp.text
counts = resp.json()["counts"]
assert counts.get("skipped", 0) == 1
# John skipped (links to existing), Mary + Junior added = 3 total.
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
assert len(people) == 3