GEDCOM: duplicate-aware import + typed name/attribute mapping

Duplicate detection (the "merge / skip / overwrite" the user asked for):
- New POST /gedcom/preview dry-runs the file and flags incoming people that
  resemble existing ones (name similarity via difflib + birth-year guard;
  high/medium score). No writes.
- /gedcom/import takes default_action (new|skip|merge|overwrite) + per-xref
  resolutions {xref: {action, target_id}}:
    new       create as a new person (current behavior)
    skip      link families to the existing person, copy nothing
    merge     attach the incoming names (as alternates), events, citations,
              and notes onto the existing person
    overwrite soft-delete the existing person, import the incoming one fresh
  Relationship creation is deduped so a merge can't double an edge.

Richer record mapping (covers the user's repo's GEDCOM):
- Multiple NAME records honor their TYPE; _MARNM (and NICK) import as typed
  alternate names — maiden stays primary, married becomes a "married" Name.
- RELI -> a "religion" event with the value in detail; OCCU/EDUC values too.
- NOTE -> person notes (and event notes); NOTE/RELI are no longer "unmapped".
- Export round-trips name TYPE.

Verified against the user's 2185-person export: 0 unmapped tags. 48 tests pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 10:35:55 -04:00
parent 04ccdbf96a
commit 5824e70895
7 changed files with 1047 additions and 90 deletions
+167
View File
@@ -2422,12 +2422,67 @@
}
}
},
"/api/v1/trees/{tree_id}/gedcom/preview": {
"post": {
"tags": [
"gedcom"
],
"summary": "Preview Gedcom",
"description": "Dry run: report counts and incoming people that look like duplicates of\nexisting ones, so the user can choose how to resolve each before importing.",
"operationId": "preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post",
"parameters": [
{
"name": "tree_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
"title": "Tree Id"
}
}
],
"requestBody": {
"required": true,
"content": {
"multipart/form-data": {
"schema": {
"$ref": "#/components/schemas/Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"
}
}
}
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ImportPreview"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
}
}
},
"/api/v1/trees/{tree_id}/gedcom/import": {
"post": {
"tags": [
"gedcom"
],
"summary": "Import Gedcom",
"description": "Import a GEDCOM. ``default_action`` (new|skip|merge|overwrite) applies to\nincoming people that match an existing one; ``resolutions`` is a JSON object\n{xref: {action, target_id}} overriding it per record.",
"operationId": "import_gedcom_api_v1_trees__tree_id__gedcom_import_post",
"parameters": [
{
@@ -2525,6 +2580,16 @@
"type": "string",
"contentMediaType": "application/octet-stream",
"title": "File"
},
"default_action": {
"type": "string",
"title": "Default Action",
"default": "new"
},
"resolutions": {
"type": "string",
"title": "Resolutions",
"default": "{}"
}
},
"type": "object",
@@ -2533,6 +2598,20 @@
],
"title": "Body_import_gedcom_api_v1_trees__tree_id__gedcom_import_post"
},
"Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post": {
"properties": {
"file": {
"type": "string",
"contentMediaType": "application/octet-stream",
"title": "File"
}
},
"type": "object",
"required": [
"file"
],
"title": "Body_preview_gedcom_api_v1_trees__tree_id__gedcom_preview_post"
},
"Body_upload_media_api_v1_trees__tree_id__media_post": {
"properties": {
"file": {
@@ -2854,6 +2933,62 @@
"type": "object",
"title": "CitationUpdate"
},
"DuplicateMatch": {
"properties": {
"xref": {
"type": "string",
"title": "Xref"
},
"incoming_name": {
"type": "string",
"title": "Incoming Name"
},
"incoming_birth_year": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Incoming Birth Year"
},
"existing_person_id": {
"type": "string",
"format": "uuid",
"title": "Existing Person Id"
},
"existing_name": {
"type": "string",
"title": "Existing Name"
},
"existing_birth_year": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Existing Birth Year"
},
"score": {
"type": "string",
"title": "Score"
}
},
"type": "object",
"required": [
"xref",
"incoming_name",
"existing_person_id",
"existing_name",
"score"
],
"title": "DuplicateMatch"
},
"EventCreate": {
"properties": {
"event_type": {
@@ -3246,6 +3381,38 @@
"type": "object",
"title": "HTTPValidationError"
},
"ImportPreview": {
"properties": {
"counts": {
"additionalProperties": {
"type": "integer"
},
"type": "object",
"title": "Counts"
},
"potential_duplicates": {
"items": {
"$ref": "#/components/schemas/DuplicateMatch"
},
"type": "array",
"title": "Potential Duplicates"
},
"unmapped_tags": {
"items": {
"type": "string"
},
"type": "array",
"title": "Unmapped Tags"
}
},
"type": "object",
"required": [
"counts",
"potential_duplicates",
"unmapped_tags"
],
"title": "ImportPreview"
},
"ImportReport": {
"properties": {
"counts": {