Files
provenance/backend/app/integrations/objectstore/s3.py
T
justin 34d30e3134 Add media (object storage) and the background worker (Phase 1)
Media model + migration; an ObjectStore interface with an S3/MinIO (boto3) implementation behind the service layer. Upload (multipart) stores bytes in object storage + a metadata row (checksum, size, content-type, optional attach to person/event/source); list returns presigned URLs; delete is soft. Editor-gated, privacy-filtered, audited. 24 tests pass (object store faked).

Introduces the worker container (same image, 'python -m app.worker'): its first job is the scheduled 30-day soft-delete purge across tables + media object cleanup. Compose gains worker + S3 env on backend/worker; dev override builds the worker too.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Justin Paul <justin@jpaul.me>
2026-06-06 21:46:09 -04:00

57 lines
1.9 KiB
Python

"""S3-compatible ObjectStore (boto3), suitable for MinIO or any S3 provider.
boto3 is synchronous; each call is dispatched to a thread so request handlers
and the worker stay async."""
import asyncio
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from app.core.config import Settings
from app.integrations.objectstore.base import ObjectStore
class S3ObjectStore(ObjectStore):
def __init__(self, settings: Settings) -> None:
self.bucket = settings.s3_bucket
self.presign_ttl = settings.s3_presign_ttl
self._client = boto3.client(
"s3",
endpoint_url=settings.s3_endpoint_url,
aws_access_key_id=settings.s3_access_key,
aws_secret_access_key=settings.s3_secret_key,
region_name=settings.s3_region,
config=Config(signature_version="s3v4"),
)
def _ensure_bucket_sync(self) -> None:
try:
self._client.head_bucket(Bucket=self.bucket)
except ClientError:
self._client.create_bucket(Bucket=self.bucket)
async def ensure_bucket(self) -> None:
await asyncio.to_thread(self._ensure_bucket_sync)
async def put_object(self, *, key: str, data: bytes, content_type: str) -> None:
await asyncio.to_thread(
self._client.put_object,
Bucket=self.bucket,
Key=key,
Body=data,
ContentType=content_type,
)
async def presigned_get_url(self, *, key: str) -> str:
return await asyncio.to_thread(
self._client.generate_presigned_url,
"get_object",
Params={"Bucket": self.bucket, "Key": key},
ExpiresIn=self.presign_ttl,
)
async def delete_object(self, *, key: str) -> None:
await asyncio.to_thread(self._client.delete_object, Bucket=self.bucket, Key=key)