From 0f966d6b9f34ede4469b1bc15d8964111d5dbef2 Mon Sep 17 00:00:00 2001 From: TheDragonTail Date: Tue, 16 Jun 2026 03:11:48 +0100 Subject: [PATCH] fix(embeddings): fall back to default cache dir when FASTEMBED_CACHE_PATH is empty (#3434) docker-compose.yml injects FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}, which sets the variable to an empty string when the host has not defined it. FASTEMBED_CACHE_DIR used os.getenv("FASTEMBED_CACHE_PATH", default), and os.getenv only returns the default when the variable is ABSENT -- so the empty value won and FASTEMBED_CACHE_DIR became "". os.makedirs("") then raised [Errno 2] No such file or directory: '', FastEmbed failed to initialise, and every vector feature (RAG, semantic memory, tool index) silently degraded on the default Docker stack. Treat an empty value like an absent one via `os.getenv(...) or default`. Add a regression test covering the empty, unset, and explicit cases. Co-authored-by: Claude Opus 4.8 (1M context) --- src/constants.py | 8 +++- tests/test_fastembed_cache_path.py | 69 ++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 tests/test_fastembed_cache_path.py diff --git a/src/constants.py b/src/constants.py index 622f7e509..40efbe73a 100644 --- a/src/constants.py +++ b/src/constants.py @@ -57,7 +57,13 @@ MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors") # Paths with an intentional dedicated env override, defaulting under DATA_DIR. MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments")) -FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache")) +# `or` (not os.getenv's default arg) so a PRESENT-but-EMPTY value falls back to +# the default. docker-compose.yml injects `FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}`, +# which sets the var to "" when the host hasn't defined it. os.getenv(name, default) +# only returns the default when the var is ABSENT, so the empty string would win → +# os.makedirs("") raises [Errno 2] No such file or directory: '' → FastEmbed fails to +# init and all vector features (RAG, semantic memory, tool index) silently degrade. +FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join(DATA_DIR, "fastembed_cache") # Agent tool output limits (single source of truth — imported by tool_execution.py, # tool_implementations.py, agent_tools.py, and any other module that needs them) diff --git a/tests/test_fastembed_cache_path.py b/tests/test_fastembed_cache_path.py new file mode 100644 index 000000000..9dc768333 --- /dev/null +++ b/tests/test_fastembed_cache_path.py @@ -0,0 +1,69 @@ +"""Regression: FASTEMBED_CACHE_DIR must tolerate a PRESENT-but-EMPTY +FASTEMBED_CACHE_PATH. + +docker-compose.yml injects ``FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}``, +which sets the variable to ``""`` when the host has not defined it. The old +``os.getenv("FASTEMBED_CACHE_PATH", default)`` only used the default when the +variable was ABSENT, so an empty value made ``FASTEMBED_CACHE_DIR == ""`` → +``os.makedirs("")`` raised ``[Errno 2] No such file or directory: ''`` → +FastEmbed failed to initialise and every vector feature (RAG, semantic memory, +tool index) silently degraded on the default Docker stack. + +These tests pin the fix: empty is treated like absent → use the DATA_DIR +default, while an explicit non-empty override is still honoured. +""" + +from __future__ import annotations + +import importlib +import os + +import src.constants as constants + + +def _reload_with(monkeypatch, value): + """Reload src.constants with FASTEMBED_CACHE_PATH set to ``value`` (or + removed when ``value`` is None) and return the reloaded module.""" + if value is None: + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + else: + monkeypatch.setenv("FASTEMBED_CACHE_PATH", value) + return importlib.reload(constants) + + +def _restore(monkeypatch): + """Return the module to its env-default state so reloading it here does + not leak a test-specific FASTEMBED_CACHE_DIR into other tests.""" + monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False) + importlib.reload(constants) + + +def test_empty_fastembed_cache_path_falls_back_to_default(monkeypatch): + """The bug: an empty FASTEMBED_CACHE_PATH (exactly what Docker injects) + must fall back to the DATA_DIR default, never the empty string.""" + try: + mod = _reload_with(monkeypatch, "") + assert mod.FASTEMBED_CACHE_DIR, "empty env must not yield an empty path" + assert mod.FASTEMBED_CACHE_DIR == os.path.join(mod.DATA_DIR, "fastembed_cache") + finally: + _restore(monkeypatch) + + +def test_unset_fastembed_cache_path_uses_default(monkeypatch): + """Sanity: an absent variable also resolves to the default.""" + try: + mod = _reload_with(monkeypatch, None) + assert mod.FASTEMBED_CACHE_DIR == os.path.join(mod.DATA_DIR, "fastembed_cache") + finally: + _restore(monkeypatch) + + +def test_explicit_fastembed_cache_path_is_respected(monkeypatch): + """A real explicit override must still win — the fix only changes the + empty-value handling, not the documented FASTEMBED_CACHE_PATH override.""" + custom = os.path.join("custom", "fastembed-cache") + try: + mod = _reload_with(monkeypatch, custom) + assert mod.FASTEMBED_CACHE_DIR == custom + finally: + _restore(monkeypatch)