Add support for EMBEDDING_API_KEY (#2691)

* feat: support for embedding API key

* feat: encrypt and decrypt embedding API key

* test: add unit tests for EmbeddingClient authorization header behavior
This commit is contained in:
Yiğit Egemen
2026-06-05 14:47:24 +02:00
committed by GitHub
parent b5c45326e4
commit ec8fbf5d8f
7 changed files with 68 additions and 3 deletions
+7 -2
View File
@@ -38,12 +38,13 @@ _DEFAULT_FASTEMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
class EmbeddingClient:
"""Drop-in replacement for SentenceTransformer.encode() using an HTTP API."""
def __init__(self, url: Optional[str] = None, model: Optional[str] = None):
def __init__(self, url: Optional[str] = None, model: Optional[str] = None, api_key: Optional[str] = None):
self.url = url or os.getenv(
"EMBEDDING_URL",
f"http://{os.getenv('LLM_HOST', 'localhost')}:11434/v1/embeddings",
)
self.model = model or os.getenv("EMBEDDING_MODEL", _DEFAULT_MODEL)
self.api_key = api_key or os.getenv("EMBEDDING_API_KEY")
self._dim: Optional[int] = None
# Short connect timeout so a DOWN embedding endpoint (e.g. Ollama not
# running on :11434) fast-fails to the local FastEmbed fallback instead
@@ -74,6 +75,7 @@ class EmbeddingClient:
batch = texts[i : i + 64]
resp = self._client.post(
self.url,
headers={"Authorization": f"Bearer {self.api_key}"} if self.api_key else {},
json={"input": batch, "model": self.model},
)
resp.raise_for_status()
@@ -222,11 +224,14 @@ def get_embedding_client():
if persisted.get("url"):
url = persisted["url"]
model = persisted.get("model", "")
api_key = persisted.get("api_key", "")
# Also set in env so other code sees it
os.environ["EMBEDDING_URL"] = url
if model:
os.environ["EMBEDDING_MODEL"] = model
if api_key:
from src.secret_storage import decrypt
os.environ["EMBEDDING_API_KEY"] = decrypt(api_key)
# Try the HTTP embedding API — unless we already found it down this process
# (avoids paying the connect timeout again on every RAG/memory/tool probe).
if not _http_embed_down: