diff --git a/.gitea/workflows/image-only.yml b/.gitea/workflows/image-only.yml index 0422728..fc5d257 100644 --- a/.gitea/workflows/image-only.yml +++ b/.gitea/workflows/image-only.yml @@ -19,7 +19,10 @@ env: REGISTRY_PUSH: 192.168.0.2:1234 REGISTRY_PULL: git.jpaul.io IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }} - OLLAMA_URL: http://192.168.0.126:11434 + # Two GPU-pinned Ollama containers on the Gitea host — same infra + # zerto-docs uses. :11435 = Titan X, :11436 = 1080 Ti. Indexer + # round-robins per batch. + OLLAMA_URLS: http://192.168.0.2:11435,http://192.168.0.2:11436 EMBED_MODEL: nomic-embed-text PRODUCT_NAME: hvm diff --git a/.gitea/workflows/refresh.yml b/.gitea/workflows/refresh.yml index 9a48be6..c15d77f 100644 --- a/.gitea/workflows/refresh.yml +++ b/.gitea/workflows/refresh.yml @@ -29,10 +29,12 @@ env: # edit this. github.* is the Gitea-Actions inherited namespace. IMAGE: ${{ github.repository_owner }}/${{ github.event.repository.name }} - # Embedder. One URL per GPU; the indexer round-robins if you pass a - # comma-separated list. Adjust to wherever Ollama is reachable from - # the runner (gitea_default network can reach the host's bridge IP). - OLLAMA_URL: http://192.168.0.126:11434 + # Two GPU-pinned Ollama containers on the Gitea host — same infra + # zerto-docs uses (deploy/ollama-rag.docker-compose.yml over there). + # :11435 owns the Titan X, :11436 owns the 1080 Ti; the indexer + # round-robins per batch so both cards run in parallel. The host's + # primary Ollama on :11434 is left alone for OpenWebUI etc. + OLLAMA_URLS: http://192.168.0.2:11435,http://192.168.0.2:11436 EMBED_MODEL: nomic-embed-text PRODUCT_NAME: hvm diff --git a/rag/embeddings.py b/rag/embeddings.py index 84d3bbd..a072f7a 100644 --- a/rag/embeddings.py +++ b/rag/embeddings.py @@ -3,8 +3,15 @@ Swappable: implement the same `embedding_function()` interface returning a Chroma `EmbeddingFunction` and the rest of the pipeline doesn't care. -Defaults (override via env): - OLLAMA_URL one or more comma-separated URLs (load-balanced) +Env-configurable (matches the zerto-docs-rag pattern so the same Gitea +runner + GPU-pinned Ollama containers can serve every docs MCP build): + + OLLAMA_URLS comma-separated list, load-balanced round-robin per batch. + Preferred — set in the CI workflow to fan out across two + GPU-pinned Ollama containers on the Gitea host. + OLLAMA_URL single endpoint, fallback when OLLAMA_URLS is unset. + Default http://192.168.0.2:11434 (the host where the GPUs + live in Justin's lab). EMBED_MODEL model name; default 'nomic-embed-text' EMBED_DIM expected embedding dim; default 768 (nomic-embed-text) """ @@ -19,8 +26,18 @@ from chromadb import EmbeddingFunction, Documents, Embeddings log = logging.getLogger(__name__) -OLLAMA_URLS = [u.strip() for u in os.environ.get("OLLAMA_URL", - "http://localhost:11434").split(",") if u.strip()] +DEFAULT_OLLAMA_URL = "http://192.168.0.2:11434" + + +def _resolve_urls() -> list[str]: + raw = os.environ.get("OLLAMA_URLS", "").strip() + if raw: + return [u.strip().rstrip("/") for u in raw.split(",") if u.strip()] + single = os.environ.get("OLLAMA_URL", DEFAULT_OLLAMA_URL).strip().rstrip("/") + return [single] + + +OLLAMA_URLS = _resolve_urls() EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text") EMBED_DIM = int(os.environ.get("EMBED_DIM", "768"))