GEDCOM: duplicate-aware import + typed name/attribute mapping

Duplicate detection (the "merge / skip / overwrite" the user asked for):
- New POST /gedcom/preview dry-runs the file and flags incoming people that
  resemble existing ones (name similarity via difflib + birth-year guard;
  high/medium score). No writes.
- /gedcom/import takes default_action (new|skip|merge|overwrite) + per-xref
  resolutions {xref: {action, target_id}}:
    new       create as a new person (current behavior)
    skip      link families to the existing person, copy nothing
    merge     attach the incoming names (as alternates), events, citations,
              and notes onto the existing person
    overwrite soft-delete the existing person, import the incoming one fresh
  Relationship creation is deduped so a merge can't double an edge.

Richer record mapping (covers the user's repo's GEDCOM):
- Multiple NAME records honor their TYPE; _MARNM (and NICK) import as typed
  alternate names — maiden stays primary, married becomes a "married" Name.
- RELI -> a "religion" event with the value in detail; OCCU/EDUC values too.
- NOTE -> person notes (and event notes); NOTE/RELI are no longer "unmapped".
- Export round-trips name TYPE.

Verified against the user's 2185-person export: 0 unmapped tags. 48 tests pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 10:35:55 -04:00
parent 04ccdbf96a
commit 5824e70895
7 changed files with 1047 additions and 90 deletions
+212 -35
View File
@@ -5,11 +5,24 @@ import { useParams } from "next/navigation";
import { useRef, useState } from "react";
import { api } from "@/lib/api/client";
import type { components } from "@/lib/api/schema";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Input } from "@/components/ui/input";
type Report = { counts: Record<string, number>; unmapped_tags: string[] };
type Preview = components["schemas"]["ImportPreview"];
type Dup = components["schemas"]["DuplicateMatch"];
type Action = "new" | "skip" | "merge" | "overwrite";
const ACTIONS: { value: Action; label: string }[] = [
{ value: "new", label: "Import as new" },
{ value: "merge", label: "Merge into existing" },
{ value: "skip", label: "Skip (use existing)" },
{ value: "overwrite", label: "Overwrite existing" },
];
const fieldCls = "h-9 rounded-md border border-[var(--border)] bg-[var(--surface)] px-2 text-sm";
export default function GedcomPage() {
const params = useParams<{ id: string }>();
@@ -22,44 +35,92 @@ export default function GedcomPage() {
const [importedTreeId, setImportedTreeId] = useState<string | null>(null);
const fileRef = useRef<HTMLInputElement>(null);
async function onFile(e: React.ChangeEvent<HTMLInputElement>) {
const file = e.target.files?.[0];
if (!file) return;
setBusy(true);
// Two-step dedupe flow (only when importing into an existing tree).
const [file, setFile] = useState<File | null>(null);
const [preview, setPreview] = useState<Preview | null>(null);
const [resolutions, setResolutions] = useState<Record<string, Action>>({});
function resetAll() {
setReport(null);
setImportedTreeId(null);
setPreview(null);
setFile(null);
setResolutions({});
}
let tid = treeId;
if (target === "new") {
const { data } = await api.POST("/api/v1/trees", {
body: { name: newName.trim() || "Imported tree" },
});
if (!data) {
setBusy(false);
return;
}
tid = data.id;
setImportedTreeId(tid);
} else {
setImportedTreeId(treeId);
}
async function postImport(
tid: string,
f: File,
opts?: { resolutions?: string; defaultAction?: Action },
) {
const fd = new FormData();
fd.append("file", file);
fd.append("file", f);
if (opts?.defaultAction) fd.append("default_action", opts.defaultAction);
if (opts?.resolutions) fd.append("resolutions", opts.resolutions);
const resp = await fetch(`/api/v1/trees/${tid}/gedcom/import`, {
method: "POST",
body: fd,
credentials: "include",
});
if (resp.ok) setReport(await resp.json());
setBusy(false);
if (resp.ok) {
setReport(await resp.json());
setImportedTreeId(tid);
}
}
async function onFile(e: React.ChangeEvent<HTMLInputElement>) {
const f = e.target.files?.[0];
if (fileRef.current) fileRef.current.value = "";
if (!f) return;
setBusy(true);
resetAll();
if (target === "new") {
// Fresh tree — nothing to dedupe against, import directly.
const { data } = await api.POST("/api/v1/trees", {
body: { name: newName.trim() || "Imported tree" },
});
if (data) await postImport(data.id, f);
setBusy(false);
return;
}
// Existing tree — preview for duplicates first.
setFile(f);
const fd = new FormData();
fd.append("file", f);
const resp = await fetch(`/api/v1/trees/${treeId}/gedcom/preview`, {
method: "POST",
body: fd,
credentials: "include",
});
if (resp.ok) {
const pv: Preview = await resp.json();
setPreview(pv);
// Default: high-confidence matches merge, lower ones come in as new.
const init: Record<string, Action> = {};
for (const d of pv.potential_duplicates) init[d.xref] = d.score === "high" ? "merge" : "new";
setResolutions(init);
}
setBusy(false);
}
async function runImport() {
if (!file) return;
setBusy(true);
const map: Record<string, { action: Action; target_id: string }> = {};
for (const d of preview?.potential_duplicates ?? []) {
const action = resolutions[d.xref] ?? "new";
if (action !== "new") map[d.xref] = { action, target_id: d.existing_person_id };
}
await postImport(treeId, file, { resolutions: JSON.stringify(map) });
setPreview(null);
setFile(null);
setBusy(false);
}
async function exportGed() {
const resp = await fetch(`/api/v1/trees/${treeId}/gedcom/export`, {
credentials: "include",
});
const resp = await fetch(`/api/v1/trees/${treeId}/gedcom/export`, { credentials: "include" });
if (!resp.ok) return;
const blob = await resp.blob();
const url = URL.createObjectURL(blob);
@@ -70,6 +131,8 @@ export default function GedcomPage() {
URL.revokeObjectURL(url);
}
const dups = preview?.potential_duplicates ?? [];
return (
<div className="space-y-6">
<h1 className="text-2xl font-semibold">Import &amp; export GEDCOM</h1>
@@ -84,7 +147,10 @@ export default function GedcomPage() {
type="radio"
name="target"
checked={target === "new"}
onChange={() => setTarget("new")}
onChange={() => {
setTarget("new");
resetAll();
}}
/>
Import into a <strong>new tree</strong> (recommended)
</label>
@@ -101,21 +167,132 @@ export default function GedcomPage() {
type="radio"
name="target"
checked={target === "this"}
onChange={() => setTarget("this")}
onChange={() => {
setTarget("this");
resetAll();
}}
/>
Import into <strong>this tree</strong> (appends)
Import into <strong>this tree</strong> (checks for duplicates)
</label>
{target === "this" && (
{target === "this" && !preview && (
<p className="rounded-md bg-bronze/[0.08] px-3 py-2 text-sm text-[var(--muted)]">
Importing appends everyone in the file as new records it does not merge with
people already in this tree, so duplicates are possible.
We&apos;ll scan the file and flag anyone who looks like a person already in this
tree, so you can merge, skip, or overwrite before anything is saved.
</p>
)}
<input ref={fileRef} type="file" accept=".ged,.gedcom,text/plain" onChange={onFile} className="hidden" />
<Button onClick={() => fileRef.current?.click()} disabled={busy}>
{busy ? "Importing…" : "Choose GEDCOM file"}
</Button>
<input
ref={fileRef}
type="file"
accept=".ged,.gedcom,text/plain"
onChange={onFile}
className="hidden"
/>
{!preview && (
<Button onClick={() => fileRef.current?.click()} disabled={busy}>
{busy ? "Working…" : "Choose GEDCOM file"}
</Button>
)}
{/* Duplicate-resolution step */}
{preview && (
<div className="space-y-4">
<div className="flex flex-wrap gap-x-6 gap-y-1 text-sm text-[var(--muted)]">
{Object.entries(preview.counts).map(([k, v]) => (
<span key={k}>
<span className="font-medium text-[var(--foreground)]">{v}</span> {k}
</span>
))}
</div>
{dups.length === 0 ? (
<p className="rounded-md bg-bronze/[0.08] px-3 py-2 text-sm">
No likely duplicates found everyone will be imported as new.
</p>
) : (
<div className="space-y-2">
<div className="flex items-center justify-between">
<h3 className="text-sm font-semibold">
{dups.length} possible duplicate{dups.length === 1 ? "" : "s"}
</h3>
<label className="flex items-center gap-2 text-xs text-[var(--muted)]">
Set all to
<select
className={fieldCls}
onChange={(e) => {
const a = e.target.value as Action;
const all: Record<string, Action> = {};
for (const d of dups) all[d.xref] = a;
setResolutions(all);
}}
defaultValue=""
>
<option value="" disabled>
choose
</option>
{ACTIONS.map((a) => (
<option key={a.value} value={a.value}>
{a.label}
</option>
))}
</select>
</label>
</div>
<ul className="divide-y divide-[var(--border)] rounded-lg border border-[var(--border)]">
{dups.map((d: Dup) => (
<li
key={d.xref}
className="flex flex-wrap items-center justify-between gap-3 px-3 py-2 text-sm"
>
<div className="min-w-0">
<span className="font-medium">{d.incoming_name}</span>
{d.incoming_birth_year && (
<span className="text-[var(--muted)]"> b. {d.incoming_birth_year}</span>
)}
<span className="text-[var(--muted)]"> </span>
<span>{d.existing_name}</span>
{d.existing_birth_year && (
<span className="text-[var(--muted)]"> b. {d.existing_birth_year}</span>
)}
<span
className={`ml-2 rounded px-1.5 py-0.5 text-xs ${
d.score === "high"
? "bg-bronze/15 text-bronze"
: "bg-[var(--border)]/50 text-[var(--muted)]"
}`}
>
{d.score}
</span>
</div>
<select
className={fieldCls}
value={resolutions[d.xref] ?? "new"}
onChange={(e) =>
setResolutions((r) => ({ ...r, [d.xref]: e.target.value as Action }))
}
>
{ACTIONS.map((a) => (
<option key={a.value} value={a.value}>
{a.label}
</option>
))}
</select>
</li>
))}
</ul>
</div>
)}
<div className="flex gap-2">
<Button onClick={runImport} disabled={busy}>
{busy ? "Importing…" : "Run import"}
</Button>
<Button variant="ghost" onClick={resetAll} disabled={busy}>
Cancel
</Button>
</div>
</div>
)}
{report && (
<div className="space-y-3 rounded-lg border border-[var(--border)] p-4">
+108 -1
View File
@@ -557,6 +557,27 @@ export interface paths {
patch: operations["update_media_api_v1_trees__tree_id__media__media_id__patch"];
trace?: never;
};
"/api/v1/trees/{tree_id}/gedcom/preview": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
/**
* Preview Gedcom
* @description Dry run: report counts and incoming people that look like duplicates of
* existing ones, so the user can choose how to resolve each before importing.
*/
post: operations["preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/v1/trees/{tree_id}/gedcom/import": {
parameters: {
query?: never;
@@ -566,7 +587,12 @@ export interface paths {
};
get?: never;
put?: never;
/** Import Gedcom */
/**
* Import Gedcom
* @description Import a GEDCOM. ``default_action`` (new|skip|merge|overwrite) applies to
* incoming people that match an existing one; ``resolutions`` is a JSON object
* {xref: {action, target_id}} overriding it per record.
*/
post: operations["import_gedcom_api_v1_trees__tree_id__gedcom_import_post"];
delete?: never;
options?: never;
@@ -599,6 +625,21 @@ export interface components {
Body_import_gedcom_api_v1_trees__tree_id__gedcom_import_post: {
/** File */
file: string;
/**
* Default Action
* @default new
*/
default_action?: string;
/**
* Resolutions
* @default {}
*/
resolutions?: string;
};
/** Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post */
Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post: {
/** File */
file: string;
};
/** Body_upload_media_api_v1_trees__tree_id__media_post */
Body_upload_media_api_v1_trees__tree_id__media_post: {
@@ -683,6 +724,26 @@ export interface components {
detail?: string | null;
confidence?: components["schemas"]["CitationConfidence"] | null;
};
/** DuplicateMatch */
DuplicateMatch: {
/** Xref */
xref: string;
/** Incoming Name */
incoming_name: string;
/** Incoming Birth Year */
incoming_birth_year?: string | null;
/**
* Existing Person Id
* Format: uuid
*/
existing_person_id: string;
/** Existing Name */
existing_name: string;
/** Existing Birth Year */
existing_birth_year?: string | null;
/** Score */
score: string;
};
/** EventCreate */
EventCreate: {
/** Event Type */
@@ -777,6 +838,17 @@ export interface components {
/** Detail */
detail?: components["schemas"]["ValidationError"][];
};
/** ImportPreview */
ImportPreview: {
/** Counts */
counts: {
[key: string]: number;
};
/** Potential Duplicates */
potential_duplicates: components["schemas"]["DuplicateMatch"][];
/** Unmapped Tags */
unmapped_tags: string[];
};
/** ImportReport */
ImportReport: {
/** Counts */
@@ -2845,6 +2917,41 @@ export interface operations {
};
};
};
preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post: {
parameters: {
query?: never;
header?: never;
path: {
tree_id: string;
};
cookie?: never;
};
requestBody: {
content: {
"multipart/form-data": components["schemas"]["Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"];
};
};
responses: {
/** @description Successful Response */
200: {
headers: {
[name: string]: unknown;
};
content: {
"application/json": components["schemas"]["ImportPreview"];
};
};
/** @description Validation Error */
422: {
headers: {
[name: string]: unknown;
};
content: {
"application/json": components["schemas"]["HTTPValidationError"];
};
};
};
};
import_gedcom_api_v1_trees__tree_id__gedcom_import_post: {
parameters: {
query?: never;
+167
View File
@@ -2422,12 +2422,67 @@
}
}
},
"/api/v1/trees/{tree_id}/gedcom/preview": {
"post": {
"tags": [
"gedcom"
],
"summary": "Preview Gedcom",
"description": "Dry run: report counts and incoming people that look like duplicates of\nexisting ones, so the user can choose how to resolve each before importing.",
"operationId": "preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post",
"parameters": [
{
"name": "tree_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
"title": "Tree Id"
}
}
],
"requestBody": {
"required": true,
"content": {
"multipart/form-data": {
"schema": {
"$ref": "#/components/schemas/Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"
}
}
}
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ImportPreview"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
}
}
},
"/api/v1/trees/{tree_id}/gedcom/import": {
"post": {
"tags": [
"gedcom"
],
"summary": "Import Gedcom",
"description": "Import a GEDCOM. ``default_action`` (new|skip|merge|overwrite) applies to\nincoming people that match an existing one; ``resolutions`` is a JSON object\n{xref: {action, target_id}} overriding it per record.",
"operationId": "import_gedcom_api_v1_trees__tree_id__gedcom_import_post",
"parameters": [
{
@@ -2525,6 +2580,16 @@
"type": "string",
"contentMediaType": "application/octet-stream",
"title": "File"
},
"default_action": {
"type": "string",
"title": "Default Action",
"default": "new"
},
"resolutions": {
"type": "string",
"title": "Resolutions",
"default": "{}"
}
},
"type": "object",
@@ -2533,6 +2598,20 @@
],
"title": "Body_import_gedcom_api_v1_trees__tree_id__gedcom_import_post"
},
"Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post": {
"properties": {
"file": {
"type": "string",
"contentMediaType": "application/octet-stream",
"title": "File"
}
},
"type": "object",
"required": [
"file"
],
"title": "Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"
},
"Body_upload_media_api_v1_trees__tree_id__media_post": {
"properties": {
"file": {
@@ -2854,6 +2933,62 @@
"type": "object",
"title": "CitationUpdate"
},
"DuplicateMatch": {
"properties": {
"xref": {
"type": "string",
"title": "Xref"
},
"incoming_name": {
"type": "string",
"title": "Incoming Name"
},
"incoming_birth_year": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Incoming Birth Year"
},
"existing_person_id": {
"type": "string",
"format": "uuid",
"title": "Existing Person Id"
},
"existing_name": {
"type": "string",
"title": "Existing Name"
},
"existing_birth_year": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Existing Birth Year"
},
"score": {
"type": "string",
"title": "Score"
}
},
"type": "object",
"required": [
"xref",
"incoming_name",
"existing_person_id",
"existing_name",
"score"
],
"title": "DuplicateMatch"
},
"EventCreate": {
"properties": {
"event_type": {
@@ -3246,6 +3381,38 @@
"type": "object",
"title": "HTTPValidationError"
},
"ImportPreview": {
"properties": {
"counts": {
"additionalProperties": {
"type": "integer"
},
"type": "object",
"title": "Counts"
},
"potential_duplicates": {
"items": {
"$ref": "#/components/schemas/DuplicateMatch"
},
"type": "array",
"title": "Potential Duplicates"
},
"unmapped_tags": {
"items": {
"type": "string"
},
"type": "array",
"title": "Unmapped Tags"
}
},
"type": "object",
"required": [
"counts",
"potential_duplicates",
"unmapped_tags"
],
"title": "ImportPreview"
},
"ImportReport": {
"properties": {
"counts": {