From b4434cb5dd6994eaa6c687b98488a42ac301e608 Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Tue, 9 Jun 2026 12:37:03 -0400 Subject: [PATCH] Fix #169: keep citation links on GEDCOM export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export emitted SOUR records but never the per-fact SOUR links, so a Provenance→Provenance round-trip destroyed the sources graph (citations were dropped). Emit citation links on the facts they sit on: - person-level → 1 SOUR @Sx@ (2 PAGE) - name-level → 2 SOUR under 1 NAME - event-level → 2 SOUR under the event (incl. partnership events in FAM) - relationship → 1 SOUR under FAM Citations whose source didn't export are skipped. Test: a person + event citation round-trips through export→import into a fresh tree with their pages intact. GEDCOM suite 6 passed. Closes #169 Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Justin Paul --- backend/app/services/gedcom.py | 40 ++++++++++++++++++++++++++ backend/tests/test_gedcom.py | 52 ++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/backend/app/services/gedcom.py b/backend/app/services/gedcom.py index 17b15ac..2488f7c 100644 --- a/backend/app/services/gedcom.py +++ b/backend/app/services/gedcom.py @@ -692,10 +692,45 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr await session.execute(select(Place).where(Place.tree_id == tree.id)) ).scalars().all() } + citations = list( + ( + await session.execute( + select(Citation).where( + Citation.tree_id == tree.id, Citation.deleted_at.is_(None) + ) + ) + ).scalars().all() + ) pxref = {p.id: f"@I{i + 1}@" for i, p in enumerate(persons)} gender_by_id = {p.id: p.gender for p in persons} sxref = {s.id: f"@S{i + 1}@" for i, s in enumerate(sources)} + # Citations grouped by the fact they sit on, so each fact can emit its SOUR + # links (dropping these is the round-trip data loss this fixes). Skip any + # whose source didn't export. + cite_by_person: dict[uuid.UUID, list[Citation]] = defaultdict(list) + cite_by_name: dict[uuid.UUID, list[Citation]] = defaultdict(list) + cite_by_event: dict[uuid.UUID, list[Citation]] = defaultdict(list) + cite_by_rel: dict[uuid.UUID, list[Citation]] = defaultdict(list) + for c in citations: + if c.source_id not in sxref: + continue + if c.person_id: + cite_by_person[c.person_id].append(c) + elif c.event_id: + cite_by_event[c.event_id].append(c) + elif c.name_id: + cite_by_name[c.name_id].append(c) + elif c.relationship_id: + cite_by_rel[c.relationship_id].append(c) + + def cite_lines(cites: list[Citation], depth: int) -> list[str]: + lines: list[str] = [] + for c in cites: + lines.append(f"{depth} SOUR {sxref[c.source_id]}") + if c.page: + lines.append(f"{depth + 1} PAGE {c.page}") + return lines names_by_person: dict[uuid.UUID, list[Name]] = defaultdict(list) for n in sorted(names, key=lambda n: (n.sort_order, not n.is_primary)): names_by_person[n.person_id].append(n) @@ -747,6 +782,7 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr ged_type = EXPORT_TYPE_MAP.get(n.name_type) if ged_type: out.append(f"2 TYPE {ged_type}") + out += cite_lines(cite_by_name.get(n.id, []), 2) sex = {"male": "M", "female": "F"}.get(p.gender or "") if sex: out.append(f"1 SEX {sex}") @@ -759,6 +795,8 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr out.append(f"2 DATE {e.date_value}") if e.place_id and e.place_id in places: out.append(f"2 PLAC {places[e.place_id].name}") + out += cite_lines(cite_by_event.get(e.id, []), 2) + out += cite_lines(cite_by_person.get(p.id, []), 1) if p.id in child_fams: out.append(f"1 FAMC {child_fams[p.id]}") for x in spouse_fams.get(p.id, []): @@ -787,6 +825,8 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr out.append(f"1 {tag}") if _ged_date(e.date_value): out.append(f"2 DATE {e.date_value}") + out += cite_lines(cite_by_event.get(e.id, []), 2) + out += cite_lines(cite_by_rel.get(f["rel_id"], []), 1) for s in sources: out.append(f"0 {sxref[s.id]} SOUR") diff --git a/backend/tests/test_gedcom.py b/backend/tests/test_gedcom.py index 2f69768..8efab9f 100644 --- a/backend/tests/test_gedcom.py +++ b/backend/tests/test_gedcom.py @@ -77,6 +77,58 @@ async def test_gedcom_export_and_reimport(client): assert resp.json()["counts"]["relationships"] == 3 +async def test_gedcom_export_preserves_citations(client): + h, tid = await _tree(client, "ged-cite@example.com") + pid = ( + await client.post( + f"/api/v1/trees/{tid}/persons", json={"given": "Ada", "surname": "Vance"}, headers=h + ) + ).json()["id"] + eid = ( + await client.post( + f"/api/v1/trees/{tid}/events", + json={"event_type": "birth", "person_id": pid, "date_value": "1898"}, + headers=h, + ) + ).json()["id"] + sid = ( + await client.post( + f"/api/v1/trees/{tid}/sources", json={"title": "1900 Census"}, headers=h + ) + ).json()["id"] + # A person-level and an event-level citation on the same source. + await client.post( + f"/api/v1/trees/{tid}/citations", + json={"source_id": sid, "person_id": pid, "page": "p.12"}, + headers=h, + ) + await client.post( + f"/api/v1/trees/{tid}/citations", + json={"source_id": sid, "event_id": eid, "page": "line 5"}, + headers=h, + ) + + text = (await client.get(f"/api/v1/trees/{tid}/gedcom/export", headers=h)).text + # Citation links + pages are emitted (previously dropped). + assert "1 SOUR @S1@" in text # person-level + assert "2 PAGE p.12" in text + assert "2 SOUR @S1@" in text # event-level (under 1 BIRT) + assert "3 PAGE line 5" in text + + # Round-trip into a fresh tree: the citations survive. + tid2 = (await client.post("/api/v1/trees", json={"name": "RT"}, headers=h)).json()["id"] + await client.post( + f"/api/v1/trees/{tid2}/gedcom/import", + files={"file": ("rt.ged", text.encode(), "text/plain")}, + headers=h, + ) + cites = (await client.get(f"/api/v1/trees/{tid2}/citations", headers=h)).json() + assert len(cites) >= 2 + assert any(c["person_id"] for c in cites) + assert any(c["event_id"] for c in cites) + assert {"p.12", "line 5"} <= {c.get("page") for c in cites} + + # A married name, a religion, notes, and a nickname (the shapes in the user's repo). RICH = b"""0 HEAD 1 CHAR UTF-8