diff --git a/.gitea/workflows/image-only.yml b/.gitea/workflows/image-only.yml
index 0422728..fc5d257 100644
--- a/.gitea/workflows/image-only.yml
+++ b/.gitea/workflows/image-only.yml
@@ -19,7 +19,10 @@ env:
   REGISTRY_PUSH: 192.168.0.2:1234
   REGISTRY_PULL: git.jpaul.io
   IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }}
-  OLLAMA_URL: http://192.168.0.126:11434
+  # Two GPU-pinned Ollama containers on the Gitea host — same infra
+  # zerto-docs uses. :11435 = Titan X, :11436 = 1080 Ti. Indexer
+  # round-robins per batch.
+  OLLAMA_URLS: http://192.168.0.2:11435,http://192.168.0.2:11436
   EMBED_MODEL: nomic-embed-text
   PRODUCT_NAME: hvm
 
diff --git a/.gitea/workflows/refresh.yml b/.gitea/workflows/refresh.yml
index 9a48be6..c15d77f 100644
--- a/.gitea/workflows/refresh.yml
+++ b/.gitea/workflows/refresh.yml
@@ -29,10 +29,12 @@ env:
   # edit this. github.* is the Gitea-Actions inherited namespace.
   IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }}
 
-  # Embedder. One URL per GPU; the indexer round-robins if you pass a
-  # comma-separated list. Adjust to wherever Ollama is reachable from
-  # the runner (gitea_default network can reach the host's bridge IP).
-  OLLAMA_URL: http://192.168.0.126:11434
+  # Two GPU-pinned Ollama containers on the Gitea host — same infra
+  # zerto-docs uses (deploy/ollama-rag.docker-compose.yml over there).
+  # :11435 owns the Titan X, :11436 owns the 1080 Ti; the indexer
+  # round-robins per batch so both cards run in parallel. The host's
+  # primary Ollama on :11434 is left alone for OpenWebUI etc.
+  OLLAMA_URLS: http://192.168.0.2:11435,http://192.168.0.2:11436
   EMBED_MODEL: nomic-embed-text
 
   PRODUCT_NAME: hvm
diff --git a/rag/embeddings.py b/rag/embeddings.py
index 84d3bbd..a072f7a 100644
--- a/rag/embeddings.py
+++ b/rag/embeddings.py
@@ -3,8 +3,15 @@
 Swappable: implement the same `embedding_function()` interface returning
 a Chroma `EmbeddingFunction` and the rest of the pipeline doesn't care.
 
-Defaults (override via env):
-  OLLAMA_URL    one or more comma-separated URLs (load-balanced)
+Env-configurable (matches the zerto-docs-rag pattern so the same Gitea
+runner + GPU-pinned Ollama containers can serve every docs MCP build):
+
+  OLLAMA_URLS   comma-separated list, load-balanced round-robin per batch.
+                Preferred — set in the CI workflow to fan out across two
+                GPU-pinned Ollama containers on the Gitea host.
+  OLLAMA_URL    single endpoint, fallback when OLLAMA_URLS is unset.
+                Default http://192.168.0.2:11434 (the host where the GPUs
+                live in Justin's lab).
   EMBED_MODEL   model name; default 'nomic-embed-text'
   EMBED_DIM     expected embedding dim; default 768 (nomic-embed-text)
 """
@@ -19,8 +26,18 @@ from chromadb import EmbeddingFunction, Documents, Embeddings
 
 log = logging.getLogger(__name__)
 
-OLLAMA_URLS = [u.strip() for u in os.environ.get("OLLAMA_URL",
-               "http://localhost:11434").split(",") if u.strip()]
+DEFAULT_OLLAMA_URL = "http://192.168.0.2:11434"
+
+
+def _resolve_urls() -> list[str]:
+    raw = os.environ.get("OLLAMA_URLS", "").strip()
+    if raw:
+        return [u.strip().rstrip("/") for u in raw.split(",") if u.strip()]
+    single = os.environ.get("OLLAMA_URL", DEFAULT_OLLAMA_URL).strip().rstrip("/")
+    return [single]
+
+
+OLLAMA_URLS = _resolve_urls()
 EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text")
 EMBED_DIM = int(os.environ.get("EMBED_DIM", "768"))