mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 01:35:36 -04:00
Add support for EMBEDDING_API_KEY (#2691)
* feat: support for embedding API key * feat: encrypt and decrypt embedding API key * test: add unit tests for EmbeddingClient authorization header behavior
This commit is contained in:
+7
-2
@@ -38,12 +38,13 @@ _DEFAULT_FASTEMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
||||
class EmbeddingClient:
|
||||
"""Drop-in replacement for SentenceTransformer.encode() using an HTTP API."""
|
||||
|
||||
def __init__(self, url: Optional[str] = None, model: Optional[str] = None):
|
||||
def __init__(self, url: Optional[str] = None, model: Optional[str] = None, api_key: Optional[str] = None):
|
||||
self.url = url or os.getenv(
|
||||
"EMBEDDING_URL",
|
||||
f"http://{os.getenv('LLM_HOST', 'localhost')}:11434/v1/embeddings",
|
||||
)
|
||||
self.model = model or os.getenv("EMBEDDING_MODEL", _DEFAULT_MODEL)
|
||||
self.api_key = api_key or os.getenv("EMBEDDING_API_KEY")
|
||||
self._dim: Optional[int] = None
|
||||
# Short connect timeout so a DOWN embedding endpoint (e.g. Ollama not
|
||||
# running on :11434) fast-fails to the local FastEmbed fallback instead
|
||||
@@ -74,6 +75,7 @@ class EmbeddingClient:
|
||||
batch = texts[i : i + 64]
|
||||
resp = self._client.post(
|
||||
self.url,
|
||||
headers={"Authorization": f"Bearer {self.api_key}"} if self.api_key else {},
|
||||
json={"input": batch, "model": self.model},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
@@ -222,11 +224,14 @@ def get_embedding_client():
|
||||
if persisted.get("url"):
|
||||
url = persisted["url"]
|
||||
model = persisted.get("model", "")
|
||||
api_key = persisted.get("api_key", "")
|
||||
# Also set in env so other code sees it
|
||||
os.environ["EMBEDDING_URL"] = url
|
||||
if model:
|
||||
os.environ["EMBEDDING_MODEL"] = model
|
||||
|
||||
if api_key:
|
||||
from src.secret_storage import decrypt
|
||||
os.environ["EMBEDDING_API_KEY"] = decrypt(api_key)
|
||||
# Try the HTTP embedding API — unless we already found it down this process
|
||||
# (avoids paying the connect timeout again on every RAG/memory/tool probe).
|
||||
if not _http_embed_down:
|
||||
|
||||
Reference in New Issue
Block a user