Add support for EMBEDDING_API_KEY (#2691)

* feat: support for embedding API key * feat: encrypt and decrypt embedding API key * test: add unit tests for EmbeddingClient authorization header behavior
2026-06-16 01:35:36 -04:00 · 2026-06-05 14:47:24 +02:00
parent b5c45326e4
commit ec8fbf5d8f
7 changed files with 68 additions and 3 deletions
@@ -38,12 +38,13 @@ _DEFAULT_FASTEMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 class EmbeddingClient:
    """Drop-in replacement for SentenceTransformer.encode() using an HTTP API."""

-    def __init__(self, url: Optional[str] = None, model: Optional[str] = None):
+    def __init__(self, url: Optional[str] = None, model: Optional[str] = None, api_key: Optional[str] = None):
        self.url = url or os.getenv(
            "EMBEDDING_URL",
            f"http://{os.getenv('LLM_HOST', 'localhost')}:11434/v1/embeddings",
        )
        self.model = model or os.getenv("EMBEDDING_MODEL", _DEFAULT_MODEL)
+        self.api_key = api_key or os.getenv("EMBEDDING_API_KEY")
        self._dim: Optional[int] = None
        # Short connect timeout so a DOWN embedding endpoint (e.g. Ollama not
        # running on :11434) fast-fails to the local FastEmbed fallback instead
@@ -74,6 +75,7 @@ class EmbeddingClient:
            batch = texts[i : i + 64]
            resp = self._client.post(
                self.url,
+                headers={"Authorization": f"Bearer {self.api_key}"} if self.api_key else {},
                json={"input": batch, "model": self.model},
            )
            resp.raise_for_status()
@@ -222,11 +224,14 @@ def get_embedding_client():
    if persisted.get("url"):
        url = persisted["url"]
        model = persisted.get("model", "")
+        api_key = persisted.get("api_key", "")
        # Also set in env so other code sees it
        os.environ["EMBEDDING_URL"] = url
        if model:
            os.environ["EMBEDDING_MODEL"] = model
-
+        if api_key:
+            from src.secret_storage import decrypt
+            os.environ["EMBEDDING_API_KEY"] = decrypt(api_key)
    # Try the HTTP embedding API — unless we already found it down this process
    # (avoids paying the connect timeout again on every RAG/memory/tool probe).
    if not _http_embed_down: