Fix #169: keep citation links on GEDCOM export

Export emitted SOUR records but never the per-fact SOUR links, so a
Provenance→Provenance round-trip destroyed the sources graph (citations were
dropped). Emit citation links on the facts they sit on:
- person-level → 1 SOUR @Sx@ (2 PAGE)
- name-level   → 2 SOUR under 1 NAME
- event-level  → 2 SOUR under the event (incl. partnership events in FAM)
- relationship → 1 SOUR under FAM
Citations whose source didn't export are skipped.

Test: a person + event citation round-trips through export→import into a fresh
tree with their pages intact. GEDCOM suite 6 passed.

Closes #169

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Justin Paul <justin@jpaul.me>
This commit is contained in:
2026-06-09 12:37:03 -04:00
parent 39e3eac3df
commit b4434cb5dd
2 changed files with 92 additions and 0 deletions
+40
View File
@@ -692,10 +692,45 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr
await session.execute(select(Place).where(Place.tree_id == tree.id))
).scalars().all()
}
citations = list(
(
await session.execute(
select(Citation).where(
Citation.tree_id == tree.id, Citation.deleted_at.is_(None)
)
)
).scalars().all()
)
pxref = {p.id: f"@I{i + 1}@" for i, p in enumerate(persons)}
gender_by_id = {p.id: p.gender for p in persons}
sxref = {s.id: f"@S{i + 1}@" for i, s in enumerate(sources)}
# Citations grouped by the fact they sit on, so each fact can emit its SOUR
# links (dropping these is the round-trip data loss this fixes). Skip any
# whose source didn't export.
cite_by_person: dict[uuid.UUID, list[Citation]] = defaultdict(list)
cite_by_name: dict[uuid.UUID, list[Citation]] = defaultdict(list)
cite_by_event: dict[uuid.UUID, list[Citation]] = defaultdict(list)
cite_by_rel: dict[uuid.UUID, list[Citation]] = defaultdict(list)
for c in citations:
if c.source_id not in sxref:
continue
if c.person_id:
cite_by_person[c.person_id].append(c)
elif c.event_id:
cite_by_event[c.event_id].append(c)
elif c.name_id:
cite_by_name[c.name_id].append(c)
elif c.relationship_id:
cite_by_rel[c.relationship_id].append(c)
def cite_lines(cites: list[Citation], depth: int) -> list[str]:
lines: list[str] = []
for c in cites:
lines.append(f"{depth} SOUR {sxref[c.source_id]}")
if c.page:
lines.append(f"{depth + 1} PAGE {c.page}")
return lines
names_by_person: dict[uuid.UUID, list[Name]] = defaultdict(list)
for n in sorted(names, key=lambda n: (n.sort_order, not n.is_primary)):
names_by_person[n.person_id].append(n)
@@ -747,6 +782,7 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr
ged_type = EXPORT_TYPE_MAP.get(n.name_type)
if ged_type:
out.append(f"2 TYPE {ged_type}")
out += cite_lines(cite_by_name.get(n.id, []), 2)
sex = {"male": "M", "female": "F"}.get(p.gender or "")
if sex:
out.append(f"1 SEX {sex}")
@@ -759,6 +795,8 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr
out.append(f"2 DATE {e.date_value}")
if e.place_id and e.place_id in places:
out.append(f"2 PLAC {places[e.place_id].name}")
out += cite_lines(cite_by_event.get(e.id, []), 2)
out += cite_lines(cite_by_person.get(p.id, []), 1)
if p.id in child_fams:
out.append(f"1 FAMC {child_fams[p.id]}")
for x in spouse_fams.get(p.id, []):
@@ -787,6 +825,8 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr
out.append(f"1 {tag}")
if _ged_date(e.date_value):
out.append(f"2 DATE {e.date_value}")
out += cite_lines(cite_by_event.get(e.id, []), 2)
out += cite_lines(cite_by_rel.get(f["rel_id"], []), 1)
for s in sources:
out.append(f"0 {sxref[s.id]} SOUR")
+52
View File
@@ -77,6 +77,58 @@ async def test_gedcom_export_and_reimport(client):
assert resp.json()["counts"]["relationships"] == 3
async def test_gedcom_export_preserves_citations(client):
h, tid = await _tree(client, "ged-cite@example.com")
pid = (
await client.post(
f"/api/v1/trees/{tid}/persons", json={"given": "Ada", "surname": "Vance"}, headers=h
)
).json()["id"]
eid = (
await client.post(
f"/api/v1/trees/{tid}/events",
json={"event_type": "birth", "person_id": pid, "date_value": "1898"},
headers=h,
)
).json()["id"]
sid = (
await client.post(
f"/api/v1/trees/{tid}/sources", json={"title": "1900 Census"}, headers=h
)
).json()["id"]
# A person-level and an event-level citation on the same source.
await client.post(
f"/api/v1/trees/{tid}/citations",
json={"source_id": sid, "person_id": pid, "page": "p.12"},
headers=h,
)
await client.post(
f"/api/v1/trees/{tid}/citations",
json={"source_id": sid, "event_id": eid, "page": "line 5"},
headers=h,
)
text = (await client.get(f"/api/v1/trees/{tid}/gedcom/export", headers=h)).text
# Citation links + pages are emitted (previously dropped).
assert "1 SOUR @S1@" in text # person-level
assert "2 PAGE p.12" in text
assert "2 SOUR @S1@" in text # event-level (under 1 BIRT)
assert "3 PAGE line 5" in text
# Round-trip into a fresh tree: the citations survive.
tid2 = (await client.post("/api/v1/trees", json={"name": "RT"}, headers=h)).json()["id"]
await client.post(
f"/api/v1/trees/{tid2}/gedcom/import",
files={"file": ("rt.ged", text.encode(), "text/plain")},
headers=h,
)
cites = (await client.get(f"/api/v1/trees/{tid2}/citations", headers=h)).json()
assert len(cites) >= 2
assert any(c["person_id"] for c in cites)
assert any(c["event_id"] for c in cites)
assert {"p.12", "line 5"} <= {c.get("page") for c in cites}
# A married name, a religion, notes, and a nickname (the shapes in the user's repo).
RICH = b"""0 HEAD
1 CHAR UTF-8