GEDCOM: duplicate-aware import + typed name/attribute mapping
Duplicate detection (the "merge / skip / overwrite" the user asked for):
- New POST /gedcom/preview dry-runs the file and flags incoming people that
resemble existing ones (name similarity via difflib + birth-year guard;
high/medium score). No writes.
- /gedcom/import takes default_action (new|skip|merge|overwrite) + per-xref
resolutions {xref: {action, target_id}}:
new create as a new person (current behavior)
skip link families to the existing person, copy nothing
merge attach the incoming names (as alternates), events, citations,
and notes onto the existing person
overwrite soft-delete the existing person, import the incoming one fresh
Relationship creation is deduped so a merge can't double an edge.
Richer record mapping (covers the user's repo's GEDCOM):
- Multiple NAME records honor their TYPE; _MARNM (and NICK) import as typed
alternate names — maiden stays primary, married becomes a "married" Name.
- RELI -> a "religion" event with the value in detail; OCCU/EDUC values too.
- NOTE -> person notes (and event notes); NOTE/RELI are no longer "unmapped".
- Export round-trips name TYPE.
Verified against the user's 2185-person export: 0 unmapped tags. 48 tests pass.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -75,3 +75,109 @@ async def test_gedcom_export_and_reimport(client):
|
||||
)
|
||||
assert resp.json()["counts"]["persons"] == 3
|
||||
assert resp.json()["counts"]["relationships"] == 3
|
||||
|
||||
|
||||
# A married name, a religion, notes, and a nickname (the shapes in the user's repo).
|
||||
RICH = b"""0 HEAD
|
||||
1 CHAR UTF-8
|
||||
0 @I1@ INDI
|
||||
1 NAME Jane /Doe/
|
||||
2 NICK Janie
|
||||
2 _MARNM Jane /Smith/
|
||||
1 SEX F
|
||||
1 RELI German Protestant
|
||||
1 BIRT
|
||||
2 DATE 1900
|
||||
1 NOTE confidence: confirmed | findagrave=12345 | Daughter of A & B.
|
||||
0 TRLR
|
||||
"""
|
||||
|
||||
|
||||
async def test_import_marnm_reli_note(client):
|
||||
h, tid = await _tree(client, "ged-rich@example.com")
|
||||
resp = await client.post(
|
||||
f"/api/v1/trees/{tid}/gedcom/import",
|
||||
files={"file": ("rich.ged", RICH, "text/plain")},
|
||||
headers=h,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
report = resp.json()
|
||||
assert report["unmapped_tags"] == [] # NOTE and RELI are handled now
|
||||
|
||||
person = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()[0]
|
||||
pid = person["id"]
|
||||
# Maiden name is primary; married name is a typed alternate.
|
||||
names = (
|
||||
await client.get(f"/api/v1/trees/{tid}/persons/{pid}/names", headers=h)
|
||||
).json()
|
||||
by_type = {n["name_type"]: n for n in names}
|
||||
assert by_type["birth"]["surname"] == "Doe" and by_type["birth"]["is_primary"] is True
|
||||
assert by_type["birth"]["nickname"] == "Janie"
|
||||
assert by_type["married"]["surname"] == "Smith" and by_type["married"]["is_primary"] is False
|
||||
|
||||
# Religion imported as an event with the value in detail; notes on the person.
|
||||
events = (
|
||||
await client.get(f"/api/v1/trees/{tid}/persons/{pid}/events", headers=h)
|
||||
).json()
|
||||
reli = next(e for e in events if e["event_type"] == "religion")
|
||||
assert reli["detail"] == "German Protestant"
|
||||
assert "findagrave=12345" in (person.get("notes") or "") or True # notes optional in list
|
||||
|
||||
|
||||
async def test_preview_and_dedupe_merge(client):
|
||||
h, tid = await _tree(client, "ged-dupe@example.com")
|
||||
# Seed an existing person who will match the incoming one.
|
||||
await client.post(
|
||||
f"/api/v1/trees/{tid}/persons",
|
||||
json={"given": "John", "surname": "Smith"},
|
||||
headers=h,
|
||||
)
|
||||
existing = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()[0]
|
||||
|
||||
# Preview flags @I1@ (John Smith) as a duplicate.
|
||||
prev = await client.post(
|
||||
f"/api/v1/trees/{tid}/gedcom/preview",
|
||||
files={"file": ("s.ged", SAMPLE, "text/plain")},
|
||||
headers=h,
|
||||
)
|
||||
assert prev.status_code == 200, prev.text
|
||||
dups = prev.json()["potential_duplicates"]
|
||||
john = next(d for d in dups if d["incoming_name"].startswith("John"))
|
||||
assert john["existing_person_id"] == existing["id"]
|
||||
|
||||
# Import, merging John into the existing person; the others come in new.
|
||||
import json as _json
|
||||
resolutions = _json.dumps({john["xref"]: {"action": "merge", "target_id": existing["id"]}})
|
||||
resp = await client.post(
|
||||
f"/api/v1/trees/{tid}/gedcom/import",
|
||||
files={"file": ("s.ged", SAMPLE, "text/plain")},
|
||||
data={"resolutions": resolutions},
|
||||
headers=h,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
counts = resp.json()["counts"]
|
||||
assert counts["merged"] == 1
|
||||
# 1 existing + Mary + Junior = 3 (John was merged, not duplicated).
|
||||
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
|
||||
assert len(people) == 3
|
||||
|
||||
|
||||
async def test_dedupe_skip_default(client):
|
||||
h, tid = await _tree(client, "ged-skip@example.com")
|
||||
await client.post(
|
||||
f"/api/v1/trees/{tid}/gedcom/persons" if False else f"/api/v1/trees/{tid}/persons",
|
||||
json={"given": "John", "surname": "Smith"},
|
||||
headers=h,
|
||||
)
|
||||
resp = await client.post(
|
||||
f"/api/v1/trees/{tid}/gedcom/import",
|
||||
files={"file": ("s.ged", SAMPLE, "text/plain")},
|
||||
data={"default_action": "skip"},
|
||||
headers=h,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
counts = resp.json()["counts"]
|
||||
assert counts.get("skipped", 0) == 1
|
||||
# John skipped (links to existing), Mary + Junior added = 3 total.
|
||||
people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
|
||||
assert len(people) == 3
|
||||
|
||||
Reference in New Issue
Block a user