From de50f2c803e7e480f9903c2f5e66de37335e5038 Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Tue, 9 Jun 2026 18:39:19 -0400 Subject: [PATCH] Model providers: OpenAI/xAI/Ollama + run several at once (registry) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the #215 abstraction: - OpenAICompatibleLLMProvider / OpenAICompatibleEmbeddingProvider — one impl (via the official openai SDK) covers OpenAI, xAI (api.x.ai/v1), Ollama (…:11434/v1), OpenRouter, etc.; they differ only by base_url, key, and model. - Registry factory: build_llm_providers() / build_embedding_providers() return every provider whose credentials are configured, so you can run several concurrently. get_llm_provider(name)/get_embedding_provider(name) select by name, falling back to default_*_provider, then Null. - Per-provider env config (ANTHROPIC_*, OPENAI_*, XAI_*, OLLAMA_*) + DEFAULT_LLM_PROVIDER / DEFAULT_EMBEDDING_PROVIDER; documented in .env.example. Defaults keep AI off (empty registry). Embeddings now have real backends (OpenAI/Ollama), still separate from the LLM since Anthropic offers no embeddings endpoint. Tests cover multi-provider selection, default resolution, disabled-without-credentials, and null fail-loud. Full suite 87 passed. Relates to #215. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Justin Paul --- backend/app/api/deps.py | 66 +++++++++++--- backend/app/core/config.py | 32 +++++-- .../app/integrations/models/openai_compat.py | 40 +++++++++ backend/pyproject.toml | 1 + backend/tests/test_model_providers.py | 85 ++++++++++++++----- backend/uv.lock | 33 +++++++ deploy/.env.example | 38 ++++++--- 7 files changed, 245 insertions(+), 50 deletions(-) create mode 100644 backend/app/integrations/models/openai_compat.py diff --git a/backend/app/api/deps.py b/backend/app/api/deps.py index 8298c4c..a0ffed7 100644 --- a/backend/app/api/deps.py +++ b/backend/app/api/deps.py @@ -71,26 +71,66 @@ def get_objectstore() -> ObjectStore: ObjectStoreDep = Annotated[ObjectStore, Depends(get_objectstore)] -def get_llm_provider() -> LLMProvider: - settings = get_settings() - if settings.model_provider == "anthropic" and settings.anthropic_api_key: - from app.integrations.models.anthropic_provider import AnthropicLLMProvider +def build_llm_providers() -> dict[str, LLMProvider]: + """Every LLM provider whose credentials are configured, keyed by name. Run + several at once; pick one with get_llm_provider(name).""" + from app.integrations.models.anthropic_provider import AnthropicLLMProvider + from app.integrations.models.openai_compat import OpenAICompatibleLLMProvider - return AnthropicLLMProvider( - api_key=settings.anthropic_api_key, - model=settings.llm_model, - max_tokens=settings.llm_max_tokens, + s = get_settings() + providers: dict[str, LLMProvider] = {} + if s.anthropic_api_key: + providers["anthropic"] = AnthropicLLMProvider( + api_key=s.anthropic_api_key, model=s.anthropic_model, max_tokens=s.llm_max_tokens ) - return NullLLMProvider() + if s.openai_api_key: + providers["openai"] = OpenAICompatibleLLMProvider( + api_key=s.openai_api_key, base_url=s.openai_base_url, model=s.openai_model, + max_tokens=s.llm_max_tokens, + ) + if s.xai_api_key: + providers["xai"] = OpenAICompatibleLLMProvider( + api_key=s.xai_api_key, base_url=s.xai_base_url, model=s.xai_model, + max_tokens=s.llm_max_tokens, + ) + if s.ollama_enabled: + providers["ollama"] = OpenAICompatibleLLMProvider( + api_key=None, base_url=s.ollama_base_url, model=s.ollama_model, + max_tokens=s.llm_max_tokens, + ) + return providers + + +def get_llm_provider(name: str | None = None) -> LLMProvider: + """The named LLM provider, or the configured default, or Null if unconfigured.""" + providers = build_llm_providers() + return providers.get(name or get_settings().default_llm_provider) or NullLLMProvider() LLMProviderDep = Annotated[LLMProvider, Depends(get_llm_provider)] -def get_embedding_provider() -> EmbeddingProvider: - # Only the null provider exists today; concrete embedders (Ollama/Voyage) - # implement the same interface and are selected here by settings.embedding_provider. - return NullEmbeddingProvider() +def build_embedding_providers() -> dict[str, EmbeddingProvider]: + from app.integrations.models.openai_compat import OpenAICompatibleEmbeddingProvider + + s = get_settings() + providers: dict[str, EmbeddingProvider] = {} + if s.openai_api_key: + providers["openai"] = OpenAICompatibleEmbeddingProvider( + api_key=s.openai_api_key, base_url=s.openai_base_url, + model=s.openai_embedding_model, dimensions=s.embedding_dimensions, + ) + if s.ollama_enabled: + providers["ollama"] = OpenAICompatibleEmbeddingProvider( + api_key=None, base_url=s.ollama_base_url, + model=s.ollama_embedding_model, dimensions=s.embedding_dimensions, + ) + return providers + + +def get_embedding_provider(name: str | None = None) -> EmbeddingProvider: + providers = build_embedding_providers() + return providers.get(name or get_settings().default_embedding_provider) or NullEmbeddingProvider() EmbeddingProviderDep = Annotated[EmbeddingProvider, Depends(get_embedding_provider)] diff --git a/backend/app/core/config.py b/backend/app/core/config.py index edbba4f..3d1e495 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -61,12 +61,34 @@ class Settings(BaseSettings): smtp_from: str = "Provenance " # --- Model providers (AI assistant + match-ranking embeddings) --- - # Separate because Anthropic has no embeddings endpoint; either can be off. - model_provider: str = "null" # null | anthropic - anthropic_api_key: str | None = None - llm_model: str = "claude-opus-4-8" + # Configure as many as you like; each is enabled when its credentials are + # present. `default_*_provider` picks which one is used by default. LLM and + # embeddings are independent (Anthropic has no embeddings endpoint). + default_llm_provider: str = "null" # null | anthropic | openai | xai | ollama + default_embedding_provider: str = "null" # null | openai | ollama llm_max_tokens: int = 4096 - embedding_provider: str = "null" # null | (future: ollama, voyage, …) + embedding_dimensions: int = 1536 # must match the embedding model + pgvector column + + # Anthropic (LLM only) + anthropic_api_key: str | None = None + anthropic_model: str = "claude-opus-4-8" + + # OpenAI (LLM + embeddings) + openai_api_key: str | None = None + openai_base_url: str = "https://api.openai.com/v1" + openai_model: str = "gpt-4o" + openai_embedding_model: str = "text-embedding-3-small" + + # xAI / Grok — OpenAI-compatible (LLM) + xai_api_key: str | None = None + xai_base_url: str = "https://api.x.ai/v1" + xai_model: str = "grok-2-latest" # set to your account's current Grok model + + # Ollama — local, OpenAI-compatible, no key (LLM + embeddings) + ollama_enabled: bool = False + ollama_base_url: str = "http://localhost:11434/v1" + ollama_model: str = "llama3.1" + ollama_embedding_model: str = "nomic-embed-text" @lru_cache diff --git a/backend/app/integrations/models/openai_compat.py b/backend/app/integrations/models/openai_compat.py new file mode 100644 index 0000000..ae3526e --- /dev/null +++ b/backend/app/integrations/models/openai_compat.py @@ -0,0 +1,40 @@ +"""OpenAI-compatible providers (one implementation, many vendors). + +OpenAI, xAI (api.x.ai/v1), Ollama (…:11434/v1), OpenRouter, Together, vLLM, etc. +all speak the OpenAI Chat Completions / Embeddings API — they differ only by +base URL, key, and model name. So a single class, parameterized by those, plugs +in every one of them via the official `openai` SDK. +""" + +from openai import AsyncOpenAI + +from app.integrations.models.base import EmbeddingProvider, LLMProvider + + +class OpenAICompatibleLLMProvider(LLMProvider): + def __init__(self, *, api_key: str | None, base_url: str, model: str, max_tokens: int = 4096) -> None: + # Local backends (Ollama) ignore the key but the SDK requires a non-empty one. + self._client = AsyncOpenAI(api_key=api_key or "not-needed", base_url=base_url) + self._model = model + self._max_tokens = max_tokens + + async def complete(self, *, prompt: str, system: str | None = None) -> str: + messages: list[dict] = [] + if system: + messages.append({"role": "system", "content": system}) + messages.append({"role": "user", "content": prompt}) + resp = await self._client.chat.completions.create( + model=self._model, max_tokens=self._max_tokens, messages=messages + ) + return resp.choices[0].message.content or "" + + +class OpenAICompatibleEmbeddingProvider(EmbeddingProvider): + def __init__(self, *, api_key: str | None, base_url: str, model: str, dimensions: int) -> None: + self._client = AsyncOpenAI(api_key=api_key or "not-needed", base_url=base_url) + self._model = model + self.dimensions = dimensions + + async def embed(self, texts: list[str]) -> list[list[float]]: + resp = await self._client.embeddings.create(model=self._model, input=texts) + return [d.embedding for d in resp.data] diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 1281776..4386e1d 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "boto3>=1.35", "python-multipart>=0.0.12", "anthropic>=0.108.0", + "openai>=2.41.0", ] [dependency-groups] diff --git a/backend/tests/test_model_providers.py b/backend/tests/test_model_providers.py index 3705e11..af394bc 100644 --- a/backend/tests/test_model_providers.py +++ b/backend/tests/test_model_providers.py @@ -1,43 +1,84 @@ -"""Model-provider selection + the null-provider fail-loud behavior. - -No network: we only assert which provider the factory returns and that the null -providers raise a clear error. (Live LLM/embedding calls aren't unit-tested.) +"""Model-provider registry: configure several vendors at once, select by name, +default selection, and the null fail-loud behavior. No network — we only assert +which provider the factory returns and that null providers raise. """ import pytest -from app.api.deps import get_embedding_provider, get_llm_provider +from app.api.deps import ( + build_embedding_providers, + build_llm_providers, + get_embedding_provider, + get_llm_provider, +) from app.core.config import get_settings from app.integrations.models.anthropic_provider import AnthropicLLMProvider from app.integrations.models.base import ModelProviderNotConfigured from app.integrations.models.null import NullEmbeddingProvider, NullLLMProvider +from app.integrations.models.openai_compat import ( + OpenAICompatibleEmbeddingProvider, + OpenAICompatibleLLMProvider, +) -async def test_default_llm_is_null_and_fails_loud(monkeypatch): - monkeypatch.setattr(get_settings(), "model_provider", "null") +def _reset(monkeypatch): + s = get_settings() + for attr, val in { + "default_llm_provider": "null", + "default_embedding_provider": "null", + "anthropic_api_key": None, + "openai_api_key": None, + "xai_api_key": None, + "ollama_enabled": False, + }.items(): + monkeypatch.setattr(s, attr, val) + return s + + +async def test_default_is_null_and_fails_loud(monkeypatch): + _reset(monkeypatch) provider = get_llm_provider() assert isinstance(provider, NullLLMProvider) with pytest.raises(ModelProviderNotConfigured): await provider.complete(prompt="hello") + assert isinstance(get_embedding_provider(), NullEmbeddingProvider) -async def test_anthropic_selected_when_configured(monkeypatch): - s = get_settings() - monkeypatch.setattr(s, "model_provider", "anthropic") - monkeypatch.setattr(s, "anthropic_api_key", "sk-ant-test-key") - monkeypatch.setattr(s, "llm_model", "claude-opus-4-8") - assert isinstance(get_llm_provider(), AnthropicLLMProvider) # no network call +async def test_multiple_llm_providers_at_once(monkeypatch): + s = _reset(monkeypatch) + monkeypatch.setattr(s, "anthropic_api_key", "sk-ant-x") + monkeypatch.setattr(s, "openai_api_key", "sk-openai-x") + monkeypatch.setattr(s, "xai_api_key", "xai-x") + monkeypatch.setattr(s, "ollama_enabled", True) + monkeypatch.setattr(s, "default_llm_provider", "anthropic") + + registry = build_llm_providers() + assert set(registry) == {"anthropic", "openai", "xai", "ollama"} + # Select any by name. + assert isinstance(get_llm_provider("anthropic"), AnthropicLLMProvider) + assert isinstance(get_llm_provider("openai"), OpenAICompatibleLLMProvider) + assert isinstance(get_llm_provider("xai"), OpenAICompatibleLLMProvider) + assert isinstance(get_llm_provider("ollama"), OpenAICompatibleLLMProvider) + # Default resolves to the configured default. + assert isinstance(get_llm_provider(), AnthropicLLMProvider) + # Unknown name → null. + assert isinstance(get_llm_provider("nope"), NullLLMProvider) -async def test_anthropic_without_key_falls_back_to_null(monkeypatch): - s = get_settings() - monkeypatch.setattr(s, "model_provider", "anthropic") - monkeypatch.setattr(s, "anthropic_api_key", None) +async def test_provider_disabled_without_credentials(monkeypatch): + s = _reset(monkeypatch) + monkeypatch.setattr(s, "default_llm_provider", "openai") # default names openai… + # …but no openai key → registry empty → null fallback. + assert build_llm_providers() == {} assert isinstance(get_llm_provider(), NullLLMProvider) -async def test_embedding_default_is_null_and_fails_loud(): - provider = get_embedding_provider() - assert isinstance(provider, NullEmbeddingProvider) - with pytest.raises(ModelProviderNotConfigured): - await provider.embed(["text"]) +async def test_embedding_providers(monkeypatch): + s = _reset(monkeypatch) + monkeypatch.setattr(s, "openai_api_key", "sk-openai-x") + monkeypatch.setattr(s, "ollama_enabled", True) + monkeypatch.setattr(s, "default_embedding_provider", "openai") + registry = build_embedding_providers() + assert set(registry) == {"openai", "ollama"} + assert isinstance(get_embedding_provider(), OpenAICompatibleEmbeddingProvider) + assert isinstance(get_embedding_provider("ollama"), OpenAICompatibleEmbeddingProvider) diff --git a/backend/uv.lock b/backend/uv.lock index 62e742b..74a8332 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -549,6 +549,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, ] +[[package]] +name = "openai" +version = "2.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3c/a6/5815fe2e2aca74b36c650d1bd43b69827cee568073d0d2d9b6fc5aaac80c/openai-2.41.0.tar.gz", hash = "sha256:db5c362acd6604b84f076abbefa66826ea4b46ecba2954ed866e6a149a1352c0", size = 783525, upload-time = "2026-06-03T22:39:40.719Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/51/d82bb424e8aa372190c5233253a2ceb399a778747d18b42cff487411e663/openai-2.41.0-py3-none-any.whl", hash = "sha256:20cc7952e8501c7e5773dd2ef7be437bae9cb549044902e1041a83a54516e375", size = 1353378, upload-time = "2026-06-03T22:39:38.964Z" }, +] + [[package]] name = "packaging" version = "26.2" @@ -578,6 +597,7 @@ dependencies = [ { name = "asyncpg" }, { name = "boto3" }, { name = "fastapi" }, + { name = "openai" }, { name = "pydantic" }, { name = "pydantic-settings" }, { name = "python-multipart" }, @@ -601,6 +621,7 @@ requires-dist = [ { name = "asyncpg", specifier = ">=0.30" }, { name = "boto3", specifier = ">=1.35" }, { name = "fastapi", specifier = ">=0.115" }, + { name = "openai", specifier = ">=2.41.0" }, { name = "pydantic", specifier = ">=2.9" }, { name = "pydantic-settings", specifier = ">=2.5" }, { name = "python-multipart", specifier = ">=0.0.12" }, @@ -919,6 +940,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/54/196d0c1db10af76baa4f64894448505d60d3cdf70ef92cbb35f46a4e4c71/starlette-1.2.1-py3-none-any.whl", hash = "sha256:4de0082d08c8f6764a85a54cf1120d6939507a19905c7768acad2a9f875d2b89", size = 73350, upload-time = "2026-05-31T01:07:50.09Z" }, ] +[[package]] +name = "tqdm" +version = "4.68.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/05/0d5260f1f1ca784f4a4a0def9cbe6affe587f5b4025328d446c3d67765f4/tqdm-4.68.2.tar.gz", hash = "sha256:89c230e8dbc67c7615c142487111222f878c77427ea09549960f62389e258add", size = 171923, upload-time = "2026-06-09T13:26:42.539Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/75/1a0392bcc21c44dcdf87b3cf2d137e7829be2c083a1e38d44efca3d57a16/tqdm-4.68.2-py3-none-any.whl", hash = "sha256:d4240441fb5353290b87d6a85968c9decc131a99b8c7faa28269d829de669ede", size = 78578, upload-time = "2026-06-09T13:26:40.731Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" diff --git a/deploy/.env.example b/deploy/.env.example index 40420f9..f81ab84 100644 --- a/deploy/.env.example +++ b/deploy/.env.example @@ -57,16 +57,34 @@ SMTP_USERNAME= SMTP_PASSWORD= SMTP_FROM= -# --- Model providers (AI assistant + embeddings; both optional, default off) --- -# LLM: 'null' disables AI features; 'anthropic' uses the Claude API. -MODEL_PROVIDER=null -ANTHROPIC_API_KEY= -LLM_MODEL=claude-opus-4-8 +# --- Model providers (AI assistant + embeddings) ----------------------------- +# Configure as many as you like — each turns on when its key is set. The +# default_* vars pick which one is used by default; the app can also select any +# configured provider by name. LLM and embeddings are independent (Anthropic has +# no embeddings endpoint). Leave the defaults 'null' to keep AI off. +DEFAULT_LLM_PROVIDER=null # null | anthropic | openai | xai | ollama +DEFAULT_EMBEDDING_PROVIDER=null # null | openai | ollama LLM_MAX_TOKENS=4096 -# Embeddings are separate (Anthropic has no embeddings endpoint). 'null' for now. -EMBEDDING_PROVIDER=null +EMBEDDING_DIMENSIONS=1536 # must match the embedding model + pgvector column -# --- Model providers — wired in Phase 4 (AI assistant). BYO key. --- -# ANTHROPIC_API_KEY= -# OPENAI_API_KEY= +# Anthropic (LLM) +ANTHROPIC_API_KEY= +ANTHROPIC_MODEL=claude-opus-4-8 + +# OpenAI (LLM + embeddings) +OPENAI_API_KEY= +OPENAI_BASE_URL=https://api.openai.com/v1 +OPENAI_MODEL=gpt-4o +OPENAI_EMBEDDING_MODEL=text-embedding-3-small + +# xAI / Grok — OpenAI-compatible (LLM) +XAI_API_KEY= +XAI_BASE_URL=https://api.x.ai/v1 +XAI_MODEL=grok-2-latest # set to your account's current Grok model + +# Ollama — local, OpenAI-compatible, no key (LLM + embeddings) +OLLAMA_ENABLED=false +OLLAMA_BASE_URL=http://localhost:11434/v1 +OLLAMA_MODEL=llama3.1 +OLLAMA_EMBEDDING_MODEL=nomic-embed-text # XAI_API_KEY=