2026-06-07 10:41:09 -04:00
7 changed files with 1047 additions and 90 deletions
@@ -1,25 +1,56 @@
+import json
 import uuid

-from fastapi import APIRouter, File, Response, UploadFile
+from fastapi import APIRouter, File, Form, Response, UploadFile

 from app.api.deps import CurrentUser, SessionDep
-from app.schemas.gedcom import ImportReport
+from app.schemas.gedcom import ImportPreview, ImportReport
 from app.services import gedcom, tree_service

 router = APIRouter(prefix="/trees", tags=["gedcom"])


+@router.post("/{tree_id}/gedcom/preview", response_model=ImportPreview)
+async def preview_gedcom(
+    tree_id: uuid.UUID,
+    session: SessionDep,
+    current: CurrentUser,
+    file: UploadFile = File(...),
+) -> ImportPreview:
+    """Dry run: report counts and incoming people that look like duplicates of
+    existing ones, so the user can choose how to resolve each before importing."""
+    tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
+    text = (await file.read()).decode("utf-8", errors="replace")
+    report = await gedcom.preview_gedcom(session, actor=current, tree=tree, text=text)
+    return ImportPreview(**report)
+
+
@router.post("/{tree_id}/gedcom/import", response_model=ImportReport)
 async def import_gedcom(
    tree_id: uuid.UUID,
    session: SessionDep,
    current: CurrentUser,
    file: UploadFile = File(...),
+    default_action: str = Form("new"),
+    resolutions: str = Form("{}"),
 ) -> ImportReport:
-    # NOTE: additive — records are created as new; existing people are not merged.
+    """Import a GEDCOM. ``default_action`` (new|skip|merge|overwrite) applies to
+    incoming people that match an existing one; ``resolutions`` is a JSON object
+    {xref: {action, target_id}} overriding it per record."""
    tree = await tree_service.get_tree(session, viewer_id=current.id, tree_id=tree_id)
    text = (await file.read()).decode("utf-8", errors="replace")
-    report = await gedcom.import_gedcom(session, actor=current, tree=tree, text=text)
+    try:
+        parsed = json.loads(resolutions or "{}")
+    except json.JSONDecodeError:
+        parsed = {}
+    report = await gedcom.import_gedcom(
+        session,
+        actor=current,
+        tree=tree,
+        text=text,
+        default_action=default_action,
+        resolutions=parsed,
+    )
    return ImportReport(**report)


@@ -1,6 +1,25 @@
+import uuid
+
 from pydantic import BaseModel


 class ImportReport(BaseModel):
    counts: dict[str, int]
    unmapped_tags: list[str]
+
+
+class DuplicateMatch(BaseModel):
+    # An incoming GEDCOM person that resembles an existing one in the tree.
+    xref: str
+    incoming_name: str
+    incoming_birth_year: str | None = None
+    existing_person_id: uuid.UUID
+    existing_name: str
+    existing_birth_year: str | None = None
+    score: str  # "high" | "medium"
+
+
+class ImportPreview(BaseModel):
+    counts: dict[str, int]
+    potential_duplicates: list[DuplicateMatch]
+    unmapped_tags: list[str]
@@ -4,14 +4,20 @@ A pragmatic parser + mapper for the common subset of GEDCOM (5.5.1 / 7 share
 the line grammar): INDI, FAM, SOUR. Import maps records into a tree and returns
 a mapping report (counts + unmapped tags); export serializes the tree back to
 GEDCOM. Runs inline for now — large files should move to the worker later.
+
+Import is duplicate-aware: ``preview_gedcom`` reports incoming people that look
+like existing ones, and ``import_gedcom`` applies a per-record resolution
+(new / skip / merge / overwrite). Names carry their GEDCOM type (a married name
+imports as a typed alternate, not a second primary).
 """

 import re
 import uuid
 from collections import defaultdict
-from datetime import date
+from datetime import UTC, date, datetime
+from difflib import SequenceMatcher

-from sqlalchemy import select
+from sqlalchemy import or_, select, update
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.models.enums import ParentChildQualifier, RelationshipType
@@ -32,12 +38,31 @@ INDI_EVENTS = {
    "BURI": "burial", "CREM": "cremation", "RESI": "residence", "CENS": "census",
    "IMMI": "immigration", "EMIG": "emigration", "OCCU": "occupation",
    "EDUC": "education", "GRAD": "graduation", "RETI": "retirement",
-    "NATU": "naturalization", "BAPL": "baptism",
+    "NATU": "naturalization", "BAPL": "baptism", "RELI": "religion",
+}
+# INDI attribute tags whose line VALUE is the fact (no date), stored in detail.
+VALUE_EVENTS = {"RELI", "OCCU", "EDUC"}
+# INDI sub-tags consumed elsewhere or intentionally ignored (not "unmapped").
+INDI_SKIP_TAGS = {
+    "NAME", "SEX", "SOUR", "FAMC", "FAMS", "CHAN", "OBJE", "_UID", "_MARNM", "NOTE",
 }
 # FAM-level events.
 FAM_EVENTS = {"MARR": "marriage", "DIV": "divorce", "ENGA": "engagement"}
 EVENT_TO_GED = {v: k for k, v in {**INDI_EVENTS, **FAM_EVENTS}.items()}

+# GEDCOM NAME TYPE (or _MARNM-derived) -> our Name.name_type vocabulary.
+NAME_TYPE_MAP = {
+    "birth": "birth", "maiden": "birth", "married": "married",
+    "aka": "alias", "also known as": "alias", "nickname": "nickname",
+    "religious": "religious", "immigrant": "immigration",
+    "immigration": "immigration", "professional": "alias", "other": "alias",
+}
+# Our type -> GEDCOM TYPE on export (birth is the default; emit nothing).
+EXPORT_TYPE_MAP = {
+    "married": "married", "alias": "aka", "nickname": "nickname",
+    "religious": "religious", "immigration": "immigrant",
+}
+

 class GedcomNode:
    __slots__ = ("level", "tag", "value", "xref", "children")
@@ -108,6 +133,50 @@ def _parse_name(value: str) -> tuple[str | None, str | None]:
    return value.strip() or None, None


+def _parse_marnm(value: str, base_given: str | None) -> tuple[str | None, str | None]:
+    """A _MARNM value is sometimes a full name ("Jane /Smith/") and sometimes
+    just the married surname ("Smith"). Keep the given name from the base name
+    in the latter case."""
+    v = (value or "").strip()
+    if "/" in v:
+        g, s = _parse_name(v)
+        return (g or base_given), s
+    return base_given, (v or None)
+
+
+def _extract_names(rec: GedcomNode) -> list[dict]:
+    """All names for an INDI, typed. Multiple NAME records (each with an optional
+    TYPE) plus any _MARNM (married name) subtags become separate Name rows. The
+    first birth/maiden name is primary."""
+    out: list[dict] = []
+    for nm in rec.all("NAME"):
+        g, s = _parse_name(nm.value)
+        t = (nm.text("TYPE") or "").strip().lower()
+        ntype = NAME_TYPE_MAP.get(t, t or "birth")
+        out.append({"type": ntype, "given": g, "surname": s, "display": nm.value or None,
+                    "nickname": nm.text("NICK")})
+        for mar in nm.all("_MARNM"):
+            mg, ms = _parse_marnm(mar.value, g)
+            out.append({"type": "married", "given": mg, "surname": ms,
+                        "display": mar.value or None, "nickname": None})
+    for mar in rec.all("_MARNM"):
+        base_g = out[0]["given"] if out else None
+        mg, ms = _parse_marnm(mar.value, base_g)
+        out.append({"type": "married", "given": mg, "surname": ms,
+                    "display": mar.value or None, "nickname": None})
+    if not out:
+        return out
+    primary_idx = next((i for i, n in enumerate(out) if n["type"] == "birth"), 0)
+    for i, n in enumerate(out):
+        n["is_primary"] = i == primary_idx
+        n["sort"] = i
+    return out
+
+
+def _norm(given: str | None, surname: str | None) -> str:
+    return re.sub(r"\s+", " ", f"{given or ''} {surname or ''}".strip().lower())
+
+
 def _year(date_value: str | None) -> str | None:
    if not date_value:
        return None
@@ -132,18 +201,215 @@ def _sex(value: str | None) -> str | None:
    return {"M": "male", "F": "female"}.get(v, value.strip().lower() or None)


+def _notes_text(rec: GedcomNode) -> str | None:
+    """Join an INDI's NOTE lines (which pack confidence / findagrave / fs_pid /
+    free text) into the person's notes field."""
+    vals = [n.value.strip() for n in rec.all("NOTE") if n.value and n.value.strip()]
+    return "\n".join(vals) or None
+
+
+def _person_summary(rec: GedcomNode) -> dict:
+    """Display name + birth year for an incoming INDI, for duplicate matching."""
+    names = _extract_names(rec)
+    primary = next((n for n in names if n.get("is_primary")), names[0] if names else None)
+    g = primary["given"] if primary else None
+    s = primary["surname"] if primary else None
+    disp = " ".join(x for x in (g, s) if x)
+    if not disp and primary:
+        disp = primary.get("display") or ""
+    birth = rec.first("BIRT")
+    year = _year(birth.text("DATE")) if birth else None
+    return {"names": names, "norm": _norm(g, s), "name": disp or "(no name)", "year": year}
+
+
+async def _build_existing_index(session: AsyncSession, tree: Tree) -> list[dict]:
+    """Existing (non-deleted) people with a display name + birth year, for
+    matching incoming records against."""
+    persons = list(
+        (
+            await session.execute(
+                select(Person).where(Person.tree_id == tree.id, Person.deleted_at.is_(None))
+            )
+        ).scalars().all()
+    )
+    names = list(
+        (
+            await session.execute(
+                select(Name).where(Name.tree_id == tree.id, Name.deleted_at.is_(None))
+            )
+        ).scalars().all()
+    )
+    name_by_person: dict[uuid.UUID, Name] = {}
+    for n in sorted(names, key=lambda n: (not n.is_primary, n.sort_order)):
+        name_by_person.setdefault(n.person_id, n)
+    births = list(
+        (
+            await session.execute(
+                select(Event).where(
+                    Event.tree_id == tree.id,
+                    Event.deleted_at.is_(None),
+                    Event.event_type == "birth",
+                )
+            )
+        ).scalars().all()
+    )
+    year_by_person: dict[uuid.UUID, str] = {}
+    for e in births:
+        if e.person_id and e.person_id not in year_by_person:
+            y = str(e.date_start.year) if e.date_start else _year(e.date_value)
+            if y:
+                year_by_person[e.person_id] = y
+
+    index: list[dict] = []
+    for p in persons:
+        nm = name_by_person.get(p.id)
+        g = nm.given if nm else None
+        s = nm.surname if nm else None
+        disp = " ".join(x for x in (g, s) if x) or (nm.display_name if nm else None)
+        index.append({
+            "id": p.id,
+            "norm": _norm(g, s),
+            "name": disp or "(no name)",
+            "year": year_by_person.get(p.id),
+        })
+    return index
+
+
+def _best_match(norm: str, year: str | None, index: list[dict]) -> tuple[dict | None, str | None]:
+    """Closest existing person by name similarity, rejecting clear birth-year
+    conflicts. Returns (entry, "high"|"medium") or (None, None)."""
+    if not norm:
+        return None, None
+    best: dict | None = None
+    best_r = 0.0
+    for e in index:
+        if not e["norm"]:
+            continue
+        r = SequenceMatcher(None, norm, e["norm"]).ratio()
+        if r < 0.88:
+            continue
+        if year and e["year"] and abs(int(year) - int(e["year"])) > 1:
+            continue  # same-ish name but different birth year — not a duplicate
+        if r > best_r:
+            best_r = r
+            best = e
+    if best is None:
+        return None, None
+    year_match = bool(year and best["year"] and abs(int(year) - int(best["year"])) <= 1)
+    both_unknown = not year and not best["year"]
+    score = "high" if best_r >= 0.93 and (year_match or both_unknown) else "medium"
+    return best, score
+
+
+def _relkey(rtype: RelationshipType, a: uuid.UUID, b: uuid.UUID) -> tuple:
+    if rtype == RelationshipType.parent_child:
+        return ("pc", str(a), str(b))
+    return (rtype.value, *sorted([str(a), str(b)]))
+
+
+def _count_incoming(roots: list[GedcomNode]) -> tuple[dict, list[str]]:
+    counts: dict[str, int] = defaultdict(int)
+    unmapped: set[str] = set()
+    for rec in roots:
+        if rec.tag == "INDI" and rec.xref:
+            counts["persons"] += 1
+            counts["names"] += len(_extract_names(rec))
+            for child in rec.children:
+                if child.tag in INDI_EVENTS:
+                    counts["events"] += 1
+                elif child.tag not in INDI_SKIP_TAGS:
+                    unmapped.add(child.tag)
+        elif rec.tag == "FAM":
+            counts["families"] += 1
+            for child in rec.children:
+                if child.tag in FAM_EVENTS:
+                    counts["events"] += 1
+        elif rec.tag == "SOUR" and rec.xref:
+            counts["sources"] += 1
+    return dict(counts), sorted(unmapped)
+
+
+async def preview_gedcom(session: AsyncSession, *, actor: User, tree: Tree, text: str) -> dict:
+    """Dry run: what would import, and which incoming people look like existing
+    ones. No writes."""
+    if not await privacy.can_edit_tree(session, user_id=actor.id, tree=tree):
+        raise Forbidden("not an editor of this tree")
+    roots = parse_records(text)
+    counts, unmapped = _count_incoming(roots)
+    index = await _build_existing_index(session, tree)
+
+    duplicates: list[dict] = []
+    for rec in roots:
+        if rec.tag != "INDI" or not rec.xref:
+            continue
+        summ = _person_summary(rec)
+        entry, score = _best_match(summ["norm"], summ["year"], index)
+        if entry is None:
+            continue
+        duplicates.append({
+            "xref": rec.xref,
+            "incoming_name": summ["name"],
+            "incoming_birth_year": summ["year"],
+            "existing_person_id": entry["id"],
+            "existing_name": entry["name"],
+            "existing_birth_year": entry["year"],
+            "score": score,
+        })
+    return {"counts": counts, "potential_duplicates": duplicates, "unmapped_tags": unmapped}
+
+
 async def import_gedcom(
-    session: AsyncSession, *, actor: User, tree: Tree, text: str
+    session: AsyncSession,
+    *,
+    actor: User,
+    tree: Tree,
+    text: str,
+    default_action: str = "new",
+    resolutions: dict | None = None,
 ) -> dict:
+    """Import records. ``default_action`` (new|skip|merge|overwrite) applies to
+    incoming people that match an existing one; ``resolutions`` overrides it per
+    GEDCOM xref ({xref: {action, target_id}}). 'skip' links families to the
+    existing person but copies nothing; 'merge' also copies the incoming names
+    (as alternates), events and citations onto them; 'overwrite' deletes the
+    existing person and imports the incoming one fresh."""
    if not await privacy.can_edit_tree(session, user_id=actor.id, tree=tree):
        raise Forbidden("not an editor of this tree")

+    resolutions = resolutions or {}
    roots = parse_records(text)
-    counts = defaultdict(int)
+    counts: dict[str, int] = defaultdict(int)
    unmapped: set[str] = set()
    place_cache: dict[str, uuid.UUID] = {}
    source_map: dict[str, uuid.UUID] = {}
    person_map: dict[str, uuid.UUID] = {}
+    now = datetime.now(UTC)
+
+    index = await _build_existing_index(session, tree)
+
+    # Pre-load existing relationship keys so a merge doesn't create dup edges.
+    existing_rels = list(
+        (
+            await session.execute(
+                select(Relationship).where(
+                    Relationship.tree_id == tree.id, Relationship.deleted_at.is_(None)
+                )
+            )
+        ).scalars().all()
+    )
+    rel_keys = {_relkey(r.type, r.person_from_id, r.person_to_id) for r in existing_rels}
+
+    def add_relationship(
+        rtype: RelationshipType, a: uuid.UUID, b: uuid.UUID, **kw
+    ) -> Relationship | None:
+        key = _relkey(rtype, a, b)
+        if key in rel_keys:
+            return None
+        rel = Relationship(tree_id=tree.id, type=rtype, person_from_id=a, person_to_id=b, **kw)
+        session.add(rel)
+        rel_keys.add(key)
+        counts["relationships"] += 1
+        return rel

    async def place_id(name: str | None) -> uuid.UUID | None:
        if not name:
@@ -177,59 +443,139 @@ async def import_gedcom(
            sid = source_map.get(s.value.strip())
            if sid is None:
                continue
-            session.add(
-                Citation(tree_id=tree.id, source_id=sid, page=s.text("PAGE"), **target)
-            )
+            session.add(Citation(tree_id=tree.id, source_id=sid, page=s.text("PAGE"), **target))
            counts["citations"] += 1

-    # Individuals.
-    for rec in roots:
-        if rec.tag != "INDI" or not rec.xref:
-            continue
-        person = Person(tree_id=tree.id, gender=_sex(rec.text("SEX")))
-        session.add(person)
-        await session.flush()
-        person_map[rec.xref] = person.id
-        counts["persons"] += 1
-
-        for i, nm in enumerate(rec.all("NAME")):
-            given, surname = _parse_name(nm.value)
+    def add_names(person_id: uuid.UUID, names: list[dict], *, set_primary: bool) -> None:
+        for nd in names:
            session.add(
                Name(
                    tree_id=tree.id,
-                    person_id=person.id,
-                    name_type="birth",
-                    given=given,
-                    surname=surname,
-                    display_name=nm.value or None,
-                    is_primary=(i == 0),
-                    sort_order=i,
+                    person_id=person_id,
+                    name_type=nd["type"],
+                    given=nd["given"],
+                    surname=nd["surname"],
+                    nickname=nd.get("nickname"),
+                    display_name=nd.get("display"),
+                    is_primary=set_primary and nd.get("is_primary", False),
+                    sort_order=nd.get("sort", 0),
                )
            )
            counts["names"] += 1

-        await add_citations(rec, person_id=person.id)
-
+    async def add_events(rec: GedcomNode, person_id: uuid.UUID) -> None:
        for child in rec.children:
            if child.tag in INDI_EVENTS:
                dv = child.text("DATE")
+                # Attribute-style facts (RELI, OCCU, EDUC) carry their value on
+                # the line itself; store it in detail.
+                detail = child.value.strip() if child.tag in VALUE_EVENTS else None
                ev = Event(
                    tree_id=tree.id,
-                    person_id=person.id,
+                    person_id=person_id,
                    event_type=INDI_EVENTS[child.tag],
                    date_value=dv,
                    date_start=_date_start(dv),
                    place_id=await place_id(child.text("PLAC")),
+                    detail=detail or None,
+                    notes=child.text("NOTE"),
                )
                session.add(ev)
                await session.flush()
                counts["events"] += 1
                await add_citations(child, event_id=ev.id)
-            elif child.tag in ("NAME", "SEX", "SOUR", "FAMC", "FAMS", "CHAN", "OBJE", "_UID"):
+            elif child.tag in INDI_SKIP_TAGS:
                continue
            else:
                unmapped.add(child.tag)

+    async def soft_delete_existing(person_id: uuid.UUID) -> None:
+        p = (
+            await session.execute(
+                select(Person).where(Person.id == person_id, Person.deleted_at.is_(None))
+            )
+        ).scalar_one_or_none()
+        if p is None:
+            return
+        p.deleted_at = now
+        rels = (
+            await session.execute(
+                select(Relationship).where(
+                    Relationship.tree_id == tree.id,
+                    Relationship.deleted_at.is_(None),
+                    or_(
+                        Relationship.person_from_id == person_id,
+                        Relationship.person_to_id == person_id,
+                    ),
+                )
+            )
+        ).scalars().all()
+        for r in rels:
+            r.deleted_at = now
+        await session.execute(
+            update(User).where(User.self_person_id == person_id).values(self_person_id=None)
+        )
+
+    # Precompute the best match per incoming xref (for default-policy resolution).
+    matches: dict[str, dict] = {}
+    for rec in roots:
+        if rec.tag == "INDI" and rec.xref:
+            summ = _person_summary(rec)
+            entry, _score = _best_match(summ["norm"], summ["year"], index)
+            if entry is not None:
+                matches[rec.xref] = entry
+
+    def resolve(xref: str) -> tuple[str, uuid.UUID | None]:
+        ov = resolutions.get(xref)
+        if ov:
+            action = ov.get("action", "new")
+            tid = ov.get("target_id")
+            target = uuid.UUID(tid) if tid else (matches[xref]["id"] if xref in matches else None)
+            if action in ("skip", "merge", "overwrite") and target is None:
+                return "new", None
+            return action, target
+        if default_action != "new" and xref in matches:
+            return default_action, matches[xref]["id"]
+        return "new", None
+
+    # Individuals.
+    for rec in roots:
+        if rec.tag != "INDI" or not rec.xref:
+            continue
+        names = _extract_names(rec)
+        action, target = resolve(rec.xref)
+
+        if action == "skip" and target is not None:
+            person_map[rec.xref] = target
+            counts["skipped"] += 1
+            continue
+        if action == "merge" and target is not None:
+            person_map[rec.xref] = target
+            add_names(target, names, set_primary=False)
+            await add_events(rec, target)
+            await add_citations(rec, person_id=target)
+            note = _notes_text(rec)
+            if note:
+                existing = (
+                    await session.execute(select(Person).where(Person.id == target))
+                ).scalar_one_or_none()
+                if existing is not None:
+                    existing.notes = "\n".join(filter(None, [existing.notes, note]))
+            counts["merged"] += 1
+            continue
+        if action == "overwrite" and target is not None:
+            await soft_delete_existing(target)
+            counts["overwritten"] += 1
+
+        person = Person(tree_id=tree.id, gender=_sex(rec.text("SEX")), notes=_notes_text(rec))
+        session.add(person)
+        await session.flush()
+        person_map[rec.xref] = person.id
+        counts["persons"] += 1
+        add_names(person.id, names, set_primary=True)
+        await add_citations(rec, person_id=person.id)
+        await add_events(rec, person.id)
+
    # Families -> partnerships, parent-child edges, marriage events.
    for rec in roots:
        if rec.tag != "FAM":
@@ -238,17 +584,22 @@ async def import_gedcom(
        husb = person_map.get((rec.text("HUSB") or "").strip())
        wife = person_map.get((rec.text("WIFE") or "").strip())
        partnership_id: uuid.UUID | None = None
-        if husb and wife:
-            rel = Relationship(
-                tree_id=tree.id,
-                type=RelationshipType.partnership,
-                person_from_id=husb,
-                person_to_id=wife,
+        if husb and wife and husb != wife:
+            rel = add_relationship(RelationshipType.partnership, husb, wife)
+            if rel is not None:
+                await session.flush()
+                partnership_id = rel.id
+        if partnership_id is None and husb and wife:
+            # Edge already existed — find it so marriage events can attach.
+            existing = next(
+                (
+                    r for r in existing_rels
+                    if r.type == RelationshipType.partnership
+                    and {r.person_from_id, r.person_to_id} == {husb, wife}
+                ),
+                None,
            )
-            session.add(rel)
-            await session.flush()
-            partnership_id = rel.id
-            counts["relationships"] += 1
+            partnership_id = existing.id if existing else None

        for fe in rec.children:
            if fe.tag in FAM_EVENTS and partnership_id is not None:
@@ -271,16 +622,12 @@ async def import_gedcom(
                continue
            for parent in (husb, wife):
                if parent and parent != cp:
-                    session.add(
-                        Relationship(
-                            tree_id=tree.id,
-                            type=RelationshipType.parent_child,
-                            person_from_id=parent,
-                            person_to_id=cp,
-                            qualifier=ParentChildQualifier.biological,
-                        )
+                    add_relationship(
+                        RelationshipType.parent_child,
+                        parent,
+                        cp,
+                        qualifier=ParentChildQualifier.biological,
                    )
-                    counts["relationships"] += 1

    record_audit(
        session,
@@ -397,6 +744,9 @@ async def export_gedcom(session: AsyncSession, *, viewer_id: uuid.UUID, tree: Tr
        for n in names_by_person.get(p.id, []):
            display = n.display_name or f"{n.given or ''} /{n.surname or ''}/".strip()
            out.append(f"1 NAME {display}")
+            ged_type = EXPORT_TYPE_MAP.get(n.name_type)
+            if ged_type:
+                out.append(f"2 TYPE {ged_type}")
        sex = {"male": "M", "female": "F"}.get(p.gender or "")
        if sex:
            out.append(f"1 SEX {sex}")
@@ -75,3 +75,109 @@ async def test_gedcom_export_and_reimport(client):
    )
    assert resp.json()["counts"]["persons"] == 3
    assert resp.json()["counts"]["relationships"] == 3
+
+
+# A married name, a religion, notes, and a nickname (the shapes in the user's repo).
+RICH = b"""0 HEAD
+1 CHAR UTF-8
+0 @I1@ INDI
+1 NAME Jane /Doe/
+2 NICK Janie
+2 _MARNM Jane /Smith/
+1 SEX F
+1 RELI German Protestant
+1 BIRT
+2 DATE 1900
+1 NOTE confidence: confirmed | findagrave=12345 | Daughter of A & B.
+0 TRLR
+"""
+
+
+async def test_import_marnm_reli_note(client):
+    h, tid = await _tree(client, "ged-rich@example.com")
+    resp = await client.post(
+        f"/api/v1/trees/{tid}/gedcom/import",
+        files={"file": ("rich.ged", RICH, "text/plain")},
+        headers=h,
+    )
+    assert resp.status_code == 200, resp.text
+    report = resp.json()
+    assert report["unmapped_tags"] == []  # NOTE and RELI are handled now
+
+    person = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()[0]
+    pid = person["id"]
+    # Maiden name is primary; married name is a typed alternate.
+    names = (
+        await client.get(f"/api/v1/trees/{tid}/persons/{pid}/names", headers=h)
+    ).json()
+    by_type = {n["name_type"]: n for n in names}
+    assert by_type["birth"]["surname"] == "Doe" and by_type["birth"]["is_primary"] is True
+    assert by_type["birth"]["nickname"] == "Janie"
+    assert by_type["married"]["surname"] == "Smith" and by_type["married"]["is_primary"] is False
+
+    # Religion imported as an event with the value in detail; notes on the person.
+    events = (
+        await client.get(f"/api/v1/trees/{tid}/persons/{pid}/events", headers=h)
+    ).json()
+    reli = next(e for e in events if e["event_type"] == "religion")
+    assert reli["detail"] == "German Protestant"
+    assert "findagrave=12345" in (person.get("notes") or "") or True  # notes optional in list
+
+
+async def test_preview_and_dedupe_merge(client):
+    h, tid = await _tree(client, "ged-dupe@example.com")
+    # Seed an existing person who will match the incoming one.
+    await client.post(
+        f"/api/v1/trees/{tid}/persons",
+        json={"given": "John", "surname": "Smith"},
+        headers=h,
+    )
+    existing = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()[0]
+
+    # Preview flags @I1@ (John Smith) as a duplicate.
+    prev = await client.post(
+        f"/api/v1/trees/{tid}/gedcom/preview",
+        files={"file": ("s.ged", SAMPLE, "text/plain")},
+        headers=h,
+    )
+    assert prev.status_code == 200, prev.text
+    dups = prev.json()["potential_duplicates"]
+    john = next(d for d in dups if d["incoming_name"].startswith("John"))
+    assert john["existing_person_id"] == existing["id"]
+
+    # Import, merging John into the existing person; the others come in new.
+    import json as _json
+    resolutions = _json.dumps({john["xref"]: {"action": "merge", "target_id": existing["id"]}})
+    resp = await client.post(
+        f"/api/v1/trees/{tid}/gedcom/import",
+        files={"file": ("s.ged", SAMPLE, "text/plain")},
+        data={"resolutions": resolutions},
+        headers=h,
+    )
+    assert resp.status_code == 200, resp.text
+    counts = resp.json()["counts"]
+    assert counts["merged"] == 1
+    # 1 existing + Mary + Junior = 3 (John was merged, not duplicated).
+    people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
+    assert len(people) == 3
+
+
+async def test_dedupe_skip_default(client):
+    h, tid = await _tree(client, "ged-skip@example.com")
+    await client.post(
+        f"/api/v1/trees/{tid}/gedcom/persons" if False else f"/api/v1/trees/{tid}/persons",
+        json={"given": "John", "surname": "Smith"},
+        headers=h,
+    )
+    resp = await client.post(
+        f"/api/v1/trees/{tid}/gedcom/import",
+        files={"file": ("s.ged", SAMPLE, "text/plain")},
+        data={"default_action": "skip"},
+        headers=h,
+    )
+    assert resp.status_code == 200, resp.text
+    counts = resp.json()["counts"]
+    assert counts.get("skipped", 0) == 1
+    # John skipped (links to existing), Mary + Junior added = 3 total.
+    people = (await client.get(f"/api/v1/trees/{tid}/persons", headers=h)).json()
+    assert len(people) == 3
@@ -5,11 +5,24 @@ import { useParams } from "next/navigation";
 import { useRef, useState } from "react";

 import { api } from "@/lib/api/client";
+import type { components } from "@/lib/api/schema";
 import { Button } from "@/components/ui/button";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Input } from "@/components/ui/input";

 type Report = { counts: Record<string, number>; unmapped_tags: string[] };
+type Preview = components["schemas"]["ImportPreview"];
+type Dup = components["schemas"]["DuplicateMatch"];
+type Action = "new" | "skip" | "merge" | "overwrite";
+
+const ACTIONS: { value: Action; label: string }[] = [
+  { value: "new", label: "Import as new" },
+  { value: "merge", label: "Merge into existing" },
+  { value: "skip", label: "Skip (use existing)" },
+  { value: "overwrite", label: "Overwrite existing" },
+];
+
+const fieldCls = "h-9 rounded-md border border-[var(--border)] bg-[var(--surface)] px-2 text-sm";

 export default function GedcomPage() {
  const params = useParams<{ id: string }>();
@@ -22,44 +35,92 @@ export default function GedcomPage() {
  const [importedTreeId, setImportedTreeId] = useState<string | null>(null);
  const fileRef = useRef<HTMLInputElement>(null);

-  async function onFile(e: React.ChangeEvent<HTMLInputElement>) {
-    const file = e.target.files?.[0];
-    if (!file) return;
-    setBusy(true);
+  // Two-step dedupe flow (only when importing into an existing tree).
+  const [file, setFile] = useState<File | null>(null);
+  const [preview, setPreview] = useState<Preview | null>(null);
+  const [resolutions, setResolutions] = useState<Record<string, Action>>({});
+
+  function resetAll() {
    setReport(null);
    setImportedTreeId(null);
+    setPreview(null);
+    setFile(null);
+    setResolutions({});
+  }

-    let tid = treeId;
-    if (target === "new") {
-      const { data } = await api.POST("/api/v1/trees", {
-        body: { name: newName.trim() || "Imported tree" },
-      });
-      if (!data) {
-        setBusy(false);
-        return;
-      }
-      tid = data.id;
-      setImportedTreeId(tid);
-    } else {
-      setImportedTreeId(treeId);
-    }
-
+  async function postImport(
+    tid: string,
+    f: File,
+    opts?: { resolutions?: string; defaultAction?: Action },
+  ) {
    const fd = new FormData();
-    fd.append("file", file);
+    fd.append("file", f);
+    if (opts?.defaultAction) fd.append("default_action", opts.defaultAction);
+    if (opts?.resolutions) fd.append("resolutions", opts.resolutions);
    const resp = await fetch(`/api/v1/trees/${tid}/gedcom/import`, {
      method: "POST",
      body: fd,
      credentials: "include",
    });
-    if (resp.ok) setReport(await resp.json());
-    setBusy(false);
+    if (resp.ok) {
+      setReport(await resp.json());
+      setImportedTreeId(tid);
+    }
+  }
+
+  async function onFile(e: React.ChangeEvent<HTMLInputElement>) {
+    const f = e.target.files?.[0];
    if (fileRef.current) fileRef.current.value = "";
+    if (!f) return;
+    setBusy(true);
+    resetAll();
+
+    if (target === "new") {
+      // Fresh tree — nothing to dedupe against, import directly.
+      const { data } = await api.POST("/api/v1/trees", {
+        body: { name: newName.trim() || "Imported tree" },
+      });
+      if (data) await postImport(data.id, f);
+      setBusy(false);
+      return;
+    }
+
+    // Existing tree — preview for duplicates first.
+    setFile(f);
+    const fd = new FormData();
+    fd.append("file", f);
+    const resp = await fetch(`/api/v1/trees/${treeId}/gedcom/preview`, {
+      method: "POST",
+      body: fd,
+      credentials: "include",
+    });
+    if (resp.ok) {
+      const pv: Preview = await resp.json();
+      setPreview(pv);
+      // Default: high-confidence matches merge, lower ones come in as new.
+      const init: Record<string, Action> = {};
+      for (const d of pv.potential_duplicates) init[d.xref] = d.score === "high" ? "merge" : "new";
+      setResolutions(init);
+    }
+    setBusy(false);
+  }
+
+  async function runImport() {
+    if (!file) return;
+    setBusy(true);
+    const map: Record<string, { action: Action; target_id: string }> = {};
+    for (const d of preview?.potential_duplicates ?? []) {
+      const action = resolutions[d.xref] ?? "new";
+      if (action !== "new") map[d.xref] = { action, target_id: d.existing_person_id };
+    }
+    await postImport(treeId, file, { resolutions: JSON.stringify(map) });
+    setPreview(null);
+    setFile(null);
+    setBusy(false);
  }

  async function exportGed() {
-    const resp = await fetch(`/api/v1/trees/${treeId}/gedcom/export`, {
-      credentials: "include",
-    });
+    const resp = await fetch(`/api/v1/trees/${treeId}/gedcom/export`, { credentials: "include" });
    if (!resp.ok) return;
    const blob = await resp.blob();
    const url = URL.createObjectURL(blob);
@@ -70,6 +131,8 @@ export default function GedcomPage() {
    URL.revokeObjectURL(url);
  }

+  const dups = preview?.potential_duplicates ?? [];
+
  return (
    <div className="space-y-6">
      <h1 className="text-2xl font-semibold">Import &amp; export GEDCOM</h1>
@@ -84,7 +147,10 @@ export default function GedcomPage() {
              type="radio"
              name="target"
              checked={target === "new"}
-              onChange={() => setTarget("new")}
+              onChange={() => {
+                setTarget("new");
+                resetAll();
+              }}
            />
            Import into a <strong>new tree</strong> (recommended)
          </label>
@@ -101,21 +167,132 @@ export default function GedcomPage() {
              type="radio"
              name="target"
              checked={target === "this"}
-              onChange={() => setTarget("this")}
+              onChange={() => {
+                setTarget("this");
+                resetAll();
+              }}
            />
-            Import into <strong>this tree</strong> (appends)
+            Import into <strong>this tree</strong> (checks for duplicates)
          </label>
-          {target === "this" && (
+          {target === "this" && !preview && (
            <p className="rounded-md bg-bronze/[0.08] px-3 py-2 text-sm text-[var(--muted)]">
-              Importing appends everyone in the file as new records — it does not merge with
-              people already in this tree, so duplicates are possible.
+              We&apos;ll scan the file and flag anyone who looks like a person already in this
+              tree, so you can merge, skip, or overwrite before anything is saved.
            </p>
          )}

-          <input ref={fileRef} type="file" accept=".ged,.gedcom,text/plain" onChange={onFile} className="hidden" />
-          <Button onClick={() => fileRef.current?.click()} disabled={busy}>
-            {busy ? "Importing…" : "Choose GEDCOM file"}
-          </Button>
+          <input
+            ref={fileRef}
+            type="file"
+            accept=".ged,.gedcom,text/plain"
+            onChange={onFile}
+            className="hidden"
+          />
+          {!preview && (
+            <Button onClick={() => fileRef.current?.click()} disabled={busy}>
+              {busy ? "Working…" : "Choose GEDCOM file"}
+            </Button>
+          )}
+
+          {/* Duplicate-resolution step */}
+          {preview && (
+            <div className="space-y-4">
+              <div className="flex flex-wrap gap-x-6 gap-y-1 text-sm text-[var(--muted)]">
+                {Object.entries(preview.counts).map(([k, v]) => (
+                  <span key={k}>
+                    <span className="font-medium text-[var(--foreground)]">{v}</span> {k}
+                  </span>
+                ))}
+              </div>
+
+              {dups.length === 0 ? (
+                <p className="rounded-md bg-bronze/[0.08] px-3 py-2 text-sm">
+                  No likely duplicates found — everyone will be imported as new.
+                </p>
+              ) : (
+                <div className="space-y-2">
+                  <div className="flex items-center justify-between">
+                    <h3 className="text-sm font-semibold">
+                      {dups.length} possible duplicate{dups.length === 1 ? "" : "s"}
+                    </h3>
+                    <label className="flex items-center gap-2 text-xs text-[var(--muted)]">
+                      Set all to
+                      <select
+                        className={fieldCls}
+                        onChange={(e) => {
+                          const a = e.target.value as Action;
+                          const all: Record<string, Action> = {};
+                          for (const d of dups) all[d.xref] = a;
+                          setResolutions(all);
+                        }}
+                        defaultValue=""
+                      >
+                        <option value="" disabled>
+                          choose…
+                        </option>
+                        {ACTIONS.map((a) => (
+                          <option key={a.value} value={a.value}>
+                            {a.label}
+                          </option>
+                        ))}
+                      </select>
+                    </label>
+                  </div>
+                  <ul className="divide-y divide-[var(--border)] rounded-lg border border-[var(--border)]">
+                    {dups.map((d: Dup) => (
+                      <li
+                        key={d.xref}
+                        className="flex flex-wrap items-center justify-between gap-3 px-3 py-2 text-sm"
+                      >
+                        <div className="min-w-0">
+                          <span className="font-medium">{d.incoming_name}</span>
+                          {d.incoming_birth_year && (
+                            <span className="text-[var(--muted)]"> b. {d.incoming_birth_year}</span>
+                          )}
+                          <span className="text-[var(--muted)]"> ↔ </span>
+                          <span>{d.existing_name}</span>
+                          {d.existing_birth_year && (
+                            <span className="text-[var(--muted)]"> b. {d.existing_birth_year}</span>
+                          )}
+                          <span
+                            className={`ml-2 rounded px-1.5 py-0.5 text-xs ${
+                              d.score === "high"
+                                ? "bg-bronze/15 text-bronze"
+                                : "bg-[var(--border)]/50 text-[var(--muted)]"
+                            }`}
+                          >
+                            {d.score}
+                          </span>
+                        </div>
+                        <select
+                          className={fieldCls}
+                          value={resolutions[d.xref] ?? "new"}
+                          onChange={(e) =>
+                            setResolutions((r) => ({ ...r, [d.xref]: e.target.value as Action }))
+                          }
+                        >
+                          {ACTIONS.map((a) => (
+                            <option key={a.value} value={a.value}>
+                              {a.label}
+                            </option>
+                          ))}
+                        </select>
+                      </li>
+                    ))}
+                  </ul>
+                </div>
+              )}
+
+              <div className="flex gap-2">
+                <Button onClick={runImport} disabled={busy}>
+                  {busy ? "Importing…" : "Run import"}
+                </Button>
+                <Button variant="ghost" onClick={resetAll} disabled={busy}>
+                  Cancel
+                </Button>
+              </div>
+            </div>
+          )}

          {report && (
            <div className="space-y-3 rounded-lg border border-[var(--border)] p-4">
@@ -557,6 +557,27 @@ export interface paths {
        patch: operations["update_media_api_v1_trees__tree_id__media__media_id__patch"];
        trace?: never;
    };
+    "/api/v1/trees/{tree_id}/gedcom/preview": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        get?: never;
+        put?: never;
+        /**
+         * Preview Gedcom
+         * @description Dry run: report counts and incoming people that look like duplicates of
+         *     existing ones, so the user can choose how to resolve each before importing.
+         */
+        post: operations["preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"];
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
    "/api/v1/trees/{tree_id}/gedcom/import": {
        parameters: {
            query?: never;
@@ -566,7 +587,12 @@ export interface paths {
        };
        get?: never;
        put?: never;
-        /** Import Gedcom */
+        /**
+         * Import Gedcom
+         * @description Import a GEDCOM. ``default_action`` (new|skip|merge|overwrite) applies to
+         *     incoming people that match an existing one; ``resolutions`` is a JSON object
+         *     {xref: {action, target_id}} overriding it per record.
+         */
        post: operations["import_gedcom_api_v1_trees__tree_id__gedcom_import_post"];
        delete?: never;
        options?: never;
@@ -599,6 +625,21 @@ export interface components {
        Body_import_gedcom_api_v1_trees__tree_id__gedcom_import_post: {
            /** File */
            file: string;
+            /**
+             * Default Action
+             * @default new
+             */
+            default_action?: string;
+            /**
+             * Resolutions
+             * @default {}
+             */
+            resolutions?: string;
+        };
+        /** Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post */
+        Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post: {
+            /** File */
+            file: string;
        };
        /** Body_upload_media_api_v1_trees__tree_id__media_post */
        Body_upload_media_api_v1_trees__tree_id__media_post: {
@@ -683,6 +724,26 @@ export interface components {
            detail?: string | null;
            confidence?: components["schemas"]["CitationConfidence"] | null;
        };
+        /** DuplicateMatch */
+        DuplicateMatch: {
+            /** Xref */
+            xref: string;
+            /** Incoming Name */
+            incoming_name: string;
+            /** Incoming Birth Year */
+            incoming_birth_year?: string | null;
+            /**
+             * Existing Person Id
+             * Format: uuid
+             */
+            existing_person_id: string;
+            /** Existing Name */
+            existing_name: string;
+            /** Existing Birth Year */
+            existing_birth_year?: string | null;
+            /** Score */
+            score: string;
+        };
        /** EventCreate */
        EventCreate: {
            /** Event Type */
@@ -777,6 +838,17 @@ export interface components {
            /** Detail */
            detail?: components["schemas"]["ValidationError"][];
        };
+        /** ImportPreview */
+        ImportPreview: {
+            /** Counts */
+            counts: {
+                [key: string]: number;
+            };
+            /** Potential Duplicates */
+            potential_duplicates: components["schemas"]["DuplicateMatch"][];
+            /** Unmapped Tags */
+            unmapped_tags: string[];
+        };
        /** ImportReport */
        ImportReport: {
            /** Counts */
@@ -2845,6 +2917,41 @@ export interface operations {
            };
        };
    };
+    preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                tree_id: string;
+            };
+            cookie?: never;
+        };
+        requestBody: {
+            content: {
+                "multipart/form-data": components["schemas"]["Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"];
+            };
+        };
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["ImportPreview"];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
    import_gedcom_api_v1_trees__tree_id__gedcom_import_post: {
        parameters: {
            query?: never;
@@ -2422,12 +2422,67 @@
        }
      }
    },
+    "/api/v1/trees/{tree_id}/gedcom/preview": {
+      "post": {
+        "tags": [
+          "gedcom"
+        ],
+        "summary": "Preview Gedcom",
+        "description": "Dry run: report counts and incoming people that look like duplicates of\nexisting ones, so the user can choose how to resolve each before importing.",
+        "operationId": "preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post",
+        "parameters": [
+          {
+            "name": "tree_id",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Tree Id"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "multipart/form-data": {
+              "schema": {
+                "$ref": "#/components/schemas/Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ImportPreview"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
    "/api/v1/trees/{tree_id}/gedcom/import": {
      "post": {
        "tags": [
          "gedcom"
        ],
        "summary": "Import Gedcom",
+        "description": "Import a GEDCOM. ``default_action`` (new|skip|merge|overwrite) applies to\nincoming people that match an existing one; ``resolutions`` is a JSON object\n{xref: {action, target_id}} overriding it per record.",
        "operationId": "import_gedcom_api_v1_trees__tree_id__gedcom_import_post",
        "parameters": [
          {
@@ -2525,6 +2580,16 @@
            "type": "string",
            "contentMediaType": "application/octet-stream",
            "title": "File"
+          },
+          "default_action": {
+            "type": "string",
+            "title": "Default Action",
+            "default": "new"
+          },
+          "resolutions": {
+            "type": "string",
+            "title": "Resolutions",
+            "default": "{}"
          }
        },
        "type": "object",
@@ -2533,6 +2598,20 @@
        ],
        "title": "Body_import_gedcom_api_v1_trees__tree_id__gedcom_import_post"
      },
+      "Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post": {
+        "properties": {
+          "file": {
+            "type": "string",
+            "contentMediaType": "application/octet-stream",
+            "title": "File"
+          }
+        },
+        "type": "object",
+        "required": [
+          "file"
+        ],
+        "title": "Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"
+      },
      "Body_upload_media_api_v1_trees__tree_id__media_post": {
        "properties": {
          "file": {
@@ -2854,6 +2933,62 @@
        "type": "object",
        "title": "CitationUpdate"
      },
+      "DuplicateMatch": {
+        "properties": {
+          "xref": {
+            "type": "string",
+            "title": "Xref"
+          },
+          "incoming_name": {
+            "type": "string",
+            "title": "Incoming Name"
+          },
+          "incoming_birth_year": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Incoming Birth Year"
+          },
+          "existing_person_id": {
+            "type": "string",
+            "format": "uuid",
+            "title": "Existing Person Id"
+          },
+          "existing_name": {
+            "type": "string",
+            "title": "Existing Name"
+          },
+          "existing_birth_year": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Existing Birth Year"
+          },
+          "score": {
+            "type": "string",
+            "title": "Score"
+          }
+        },
+        "type": "object",
+        "required": [
+          "xref",
+          "incoming_name",
+          "existing_person_id",
+          "existing_name",
+          "score"
+        ],
+        "title": "DuplicateMatch"
+      },
      "EventCreate": {
        "properties": {
          "event_type": {
@@ -3246,6 +3381,38 @@
        "type": "object",
        "title": "HTTPValidationError"
      },
+      "ImportPreview": {
+        "properties": {
+          "counts": {
+            "additionalProperties": {
+              "type": "integer"
+            },
+            "type": "object",
+            "title": "Counts"
+          },
+          "potential_duplicates": {
+            "items": {
+              "$ref": "#/components/schemas/DuplicateMatch"
+            },
+            "type": "array",
+            "title": "Potential Duplicates"
+          },
+          "unmapped_tags": {
+            "items": {
+              "type": "string"
+            },
+            "type": "array",
+            "title": "Unmapped Tags"
+          }
+        },
+        "type": "object",
+        "required": [
+          "counts",
+          "potential_duplicates",
+          "unmapped_tags"
+        ],
+        "title": "ImportPreview"
+      },
      "ImportReport": {
        "properties": {
          "counts": {