Files
provenance/backend/app/integrations/objectstore/s3.py
T
justin bd8ee9b647 Stream media through the backend (browser-reachable, privacy-checked)
Presigned URLs point at the internal minio:9000 host a browser can't reach. Add ObjectStore.get_object and a GET /media/{id}/content endpoint that resolves visibility and streams the bytes; MediaRead.url now points there. Keeps the object store private and downloads behind the privacy engine.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Justin Paul <justin@jpaul.me>
2026-06-06 21:56:04 -04:00

64 lines
2.1 KiB
Python

"""S3-compatible ObjectStore (boto3), suitable for MinIO or any S3 provider.
boto3 is synchronous; each call is dispatched to a thread so request handlers
and the worker stay async."""
import asyncio
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from app.core.config import Settings
from app.integrations.objectstore.base import ObjectStore
class S3ObjectStore(ObjectStore):
def __init__(self, settings: Settings) -> None:
self.bucket = settings.s3_bucket
self.presign_ttl = settings.s3_presign_ttl
self._client = boto3.client(
"s3",
endpoint_url=settings.s3_endpoint_url,
aws_access_key_id=settings.s3_access_key,
aws_secret_access_key=settings.s3_secret_key,
region_name=settings.s3_region,
config=Config(signature_version="s3v4"),
)
def _ensure_bucket_sync(self) -> None:
try:
self._client.head_bucket(Bucket=self.bucket)
except ClientError:
self._client.create_bucket(Bucket=self.bucket)
async def ensure_bucket(self) -> None:
await asyncio.to_thread(self._ensure_bucket_sync)
async def put_object(self, *, key: str, data: bytes, content_type: str) -> None:
await asyncio.to_thread(
self._client.put_object,
Bucket=self.bucket,
Key=key,
Body=data,
ContentType=content_type,
)
async def get_object(self, *, key: str) -> bytes:
def _get() -> bytes:
obj = self._client.get_object(Bucket=self.bucket, Key=key)
return obj["Body"].read()
return await asyncio.to_thread(_get)
async def presigned_get_url(self, *, key: str) -> str:
return await asyncio.to_thread(
self._client.generate_presigned_url,
"get_object",
Params={"Bucket": self.bucket, "Key": key},
ExpiresIn=self.presign_ttl,
)
async def delete_object(self, *, key: str) -> None:
await asyncio.to_thread(self._client.delete_object, Bucket=self.bucket, Key=key)