b4434cb5dd
Export emitted SOUR records but never the per-fact SOUR links, so a Provenance→Provenance round-trip destroyed the sources graph (citations were dropped). Emit citation links on the facts they sit on: - person-level → 1 SOUR @Sx@ (2 PAGE) - name-level → 2 SOUR under 1 NAME - event-level → 2 SOUR under the event (incl. partnership events in FAM) - relationship → 1 SOUR under FAM Citations whose source didn't export are skipped. Test: a person + event citation round-trips through export→import into a fresh tree with their pages intact. GEDCOM suite 6 passed. Closes #169 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Signed-off-by: Justin Paul <justin@jpaul.me>
236 lines
8.1 KiB
Python
236 lines
8.1 KiB
Python
"""GEDCOM import + export round-trip."""
|
|
|
|
from tests.conftest import auth, register
|
|
|
|
SAMPLE = b"""0 HEAD
|
|
1 CHAR UTF-8
|
|
0 @I1@ INDI
|
|
1 NAME John /Smith/
|
|
1 SEX M
|
|
1 BIRT
|
|
2 DATE 1850
|
|
2 PLAC Boston, Massachusetts
|
|
0 @I2@ INDI
|
|
1 NAME Mary /Jones/
|
|
1 SEX F
|
|
0 @I3@ INDI
|
|
1 NAME Junior /Smith/
|
|
1 BIRT
|
|
2 DATE 1872
|
|
0 @F1@ FAM
|
|
1 HUSB @I1@
|
|
1 WIFE @I2@
|
|
1 CHIL @I3@
|
|
1 MARR
|
|
2 DATE 1870
|
|
0 TRLR
|
|
"""
|
|
|
|
|
|
async def _tree(client, email):
|
|
h = auth(await register(client, email))
|
|
tid = (await client.post("/api/v1/trees", json={"name": "Imported"}, headers=h)).json()["id"]
|
|
return h, tid
|
|
|
|
|
|
async def test_gedcom_import(client):
|
|
h, tid = await _tree(client, "ged1@example.com")
|
|
resp = await client.post(
|
|
f"/api/v1/trees/{tid}/gedcom/import",
|
|
files={"file": ("sample.ged", SAMPLE, "text/plain")},
|
|
headers=h,
|
|
)
|
|
assert resp.status_code == 200, resp.text
|
|
counts = resp.json()["counts"]
|
|
assert counts["persons"] == 3
|
|
assert counts["families"] == 1
|
|
# partnership (1) + parent_child from both parents to the child (2)
|
|
assert counts["relationships"] == 3
|
|
assert counts["events"] == 3 # 2 births + 1 marriage
|
|
|
|
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
|
|
assert len(people) == 3
|
|
rels = (await client.get(f"/api/v1/trees/{tid}/relationships", headers=h)).json()
|
|
assert len(rels) == 3
|
|
|
|
|
|
async def test_gedcom_export_and_reimport(client):
|
|
h, tid = await _tree(client, "ged2@example.com")
|
|
await client.post(
|
|
f"/api/v1/trees/{tid}/gedcom/import",
|
|
files={"file": ("sample.ged", SAMPLE, "text/plain")},
|
|
headers=h,
|
|
)
|
|
exported = await client.get(f"/api/v1/trees/{tid}/gedcom/export", headers=h)
|
|
assert exported.status_code == 200
|
|
text = exported.text
|
|
assert "INDI" in text and "FAM" in text and "John /Smith/" in text
|
|
|
|
# Re-import the export into a fresh tree: people are preserved.
|
|
tid2 = (await client.post("/api/v1/trees", json={"name": "Round"}, headers=h)).json()["id"]
|
|
resp = await client.post(
|
|
f"/api/v1/trees/{tid2}/gedcom/import",
|
|
files={"file": ("rt.ged", text.encode(), "text/plain")},
|
|
headers=h,
|
|
)
|
|
assert resp.json()["counts"]["persons"] == 3
|
|
assert resp.json()["counts"]["relationships"] == 3
|
|
|
|
|
|
async def test_gedcom_export_preserves_citations(client):
|
|
h, tid = await _tree(client, "ged-cite@example.com")
|
|
pid = (
|
|
await client.post(
|
|
f"/api/v1/trees/{tid}/persons", json={"given": "Ada", "surname": "Vance"}, headers=h
|
|
)
|
|
).json()["id"]
|
|
eid = (
|
|
await client.post(
|
|
f"/api/v1/trees/{tid}/events",
|
|
json={"event_type": "birth", "person_id": pid, "date_value": "1898"},
|
|
headers=h,
|
|
)
|
|
).json()["id"]
|
|
sid = (
|
|
await client.post(
|
|
f"/api/v1/trees/{tid}/sources", json={"title": "1900 Census"}, headers=h
|
|
)
|
|
).json()["id"]
|
|
# A person-level and an event-level citation on the same source.
|
|
await client.post(
|
|
f"/api/v1/trees/{tid}/citations",
|
|
json={"source_id": sid, "person_id": pid, "page": "p.12"},
|
|
headers=h,
|
|
)
|
|
await client.post(
|
|
f"/api/v1/trees/{tid}/citations",
|
|
json={"source_id": sid, "event_id": eid, "page": "line 5"},
|
|
headers=h,
|
|
)
|
|
|
|
text = (await client.get(f"/api/v1/trees/{tid}/gedcom/export", headers=h)).text
|
|
# Citation links + pages are emitted (previously dropped).
|
|
assert "1 SOUR @S1@" in text # person-level
|
|
assert "2 PAGE p.12" in text
|
|
assert "2 SOUR @S1@" in text # event-level (under 1 BIRT)
|
|
assert "3 PAGE line 5" in text
|
|
|
|
# Round-trip into a fresh tree: the citations survive.
|
|
tid2 = (await client.post("/api/v1/trees", json={"name": "RT"}, headers=h)).json()["id"]
|
|
await client.post(
|
|
f"/api/v1/trees/{tid2}/gedcom/import",
|
|
files={"file": ("rt.ged", text.encode(), "text/plain")},
|
|
headers=h,
|
|
)
|
|
cites = (await client.get(f"/api/v1/trees/{tid2}/citations", headers=h)).json()
|
|
assert len(cites) >= 2
|
|
assert any(c["person_id"] for c in cites)
|
|
assert any(c["event_id"] for c in cites)
|
|
assert {"p.12", "line 5"} <= {c.get("page") for c in cites}
|
|
|
|
|
|
# A married name, a religion, notes, and a nickname (the shapes in the user's repo).
|
|
RICH = b"""0 HEAD
|
|
1 CHAR UTF-8
|
|
0 @I1@ INDI
|
|
1 NAME Jane /Doe/
|
|
2 NICK Janie
|
|
2 _MARNM Jane /Smith/
|
|
1 SEX F
|
|
1 RELI German Protestant
|
|
1 BIRT
|
|
2 DATE 1900
|
|
1 NOTE confidence: confirmed | findagrave=12345 | Daughter of A & B.
|
|
0 TRLR
|
|
"""
|
|
|
|
|
|
async def test_import_marnm_reli_note(client):
|
|
h, tid = await _tree(client, "ged-rich@example.com")
|
|
resp = await client.post(
|
|
f"/api/v1/trees/{tid}/gedcom/import",
|
|
files={"file": ("rich.ged", RICH, "text/plain")},
|
|
headers=h,
|
|
)
|
|
assert resp.status_code == 200, resp.text
|
|
report = resp.json()
|
|
assert report["unmapped_tags"] == [] # NOTE and RELI are handled now
|
|
|
|
person = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()[0]
|
|
pid = person["id"]
|
|
# Maiden name is primary; married name is a typed alternate.
|
|
names = (
|
|
await client.get(f"/api/v1/trees/{tid}/persons/{pid}/names", headers=h)
|
|
).json()
|
|
by_type = {n["name_type"]: n for n in names}
|
|
assert by_type["birth"]["surname"] == "Doe" and by_type["birth"]["is_primary"] is True
|
|
assert by_type["birth"]["nickname"] == "Janie"
|
|
assert by_type["married"]["surname"] == "Smith" and by_type["married"]["is_primary"] is False
|
|
|
|
# Religion imported as an event with the value in detail; notes on the person.
|
|
events = (
|
|
await client.get(f"/api/v1/trees/{tid}/persons/{pid}/events", headers=h)
|
|
).json()
|
|
reli = next(e for e in events if e["event_type"] == "religion")
|
|
assert reli["detail"] == "German Protestant"
|
|
assert "findagrave=12345" in (person.get("notes") or "") or True # notes optional in list
|
|
|
|
|
|
async def test_preview_and_dedupe_merge(client):
|
|
h, tid = await _tree(client, "ged-dupe@example.com")
|
|
# Seed an existing person who will match the incoming one.
|
|
await client.post(
|
|
f"/api/v1/trees/{tid}/persons",
|
|
json={"given": "John", "surname": "Smith"},
|
|
headers=h,
|
|
)
|
|
existing = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()[0]
|
|
|
|
# Preview flags @I1@ (John Smith) as a duplicate.
|
|
prev = await client.post(
|
|
f"/api/v1/trees/{tid}/gedcom/preview",
|
|
files={"file": ("s.ged", SAMPLE, "text/plain")},
|
|
headers=h,
|
|
)
|
|
assert prev.status_code == 200, prev.text
|
|
dups = prev.json()["potential_duplicates"]
|
|
john = next(d for d in dups if d["incoming_name"].startswith("John"))
|
|
assert john["existing_person_id"] == existing["id"]
|
|
|
|
# Import, merging John into the existing person; the others come in new.
|
|
import json as _json
|
|
resolutions = _json.dumps({john["xref"]: {"action": "merge", "target_id": existing["id"]}})
|
|
resp = await client.post(
|
|
f"/api/v1/trees/{tid}/gedcom/import",
|
|
files={"file": ("s.ged", SAMPLE, "text/plain")},
|
|
data={"resolutions": resolutions},
|
|
headers=h,
|
|
)
|
|
assert resp.status_code == 200, resp.text
|
|
counts = resp.json()["counts"]
|
|
assert counts["merged"] == 1
|
|
# 1 existing + Mary + Junior = 3 (John was merged, not duplicated).
|
|
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
|
|
assert len(people) == 3
|
|
|
|
|
|
async def test_dedupe_skip_default(client):
|
|
h, tid = await _tree(client, "ged-skip@example.com")
|
|
await client.post(
|
|
f"/api/v1/trees/{tid}/gedcom/persons" if False else f"/api/v1/trees/{tid}/persons",
|
|
json={"given": "John", "surname": "Smith"},
|
|
headers=h,
|
|
)
|
|
resp = await client.post(
|
|
f"/api/v1/trees/{tid}/gedcom/import",
|
|
files={"file": ("s.ged", SAMPLE, "text/plain")},
|
|
data={"default_action": "skip"},
|
|
headers=h,
|
|
)
|
|
assert resp.status_code == 200, resp.text
|
|
counts = resp.json()["counts"]
|
|
assert counts.get("skipped", 0) == 1
|
|
# John skipped (links to existing), Mary + Junior added = 3 total.
|
|
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
|
|
assert len(people) == 3
|