5 Commits

Author SHA1 Message Date
Alexandre Teixeira 68f19a889a test: add fire_and_forget to API chat webhook stub 2026-06-16 03:24:48 +01:00
RaresKeY 422f23fb12 fix(mcp): scope memory server by owner (#4315) 2026-06-16 03:18:17 +01:00
TheDragonTail 0f966d6b9f fix(embeddings): fall back to default cache dir when FASTEMBED_CACHE_PATH is empty (#3434)
docker-compose.yml injects FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-},
which sets the variable to an empty string when the host has not defined it.
FASTEMBED_CACHE_DIR used os.getenv("FASTEMBED_CACHE_PATH", default), and
os.getenv only returns the default when the variable is ABSENT -- so the empty
value won and FASTEMBED_CACHE_DIR became "". os.makedirs("") then raised
[Errno 2] No such file or directory: '', FastEmbed failed to initialise, and
every vector feature (RAG, semantic memory, tool index) silently degraded on
the default Docker stack.

Treat an empty value like an absent one via `os.getenv(...) or default`.
Add a regression test covering the empty, unset, and explicit cases.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-16 03:11:48 +01:00
Afonso Coutinho 7b09491557 fix: check-in calendar digest leaks every user's events (missing owner scope) (#1925)
* fix: check-in calendar digest leaks every user's events (no owner scope)

* Seed dtend on calendar events in digest test so the NOT NULL column is satisfied
2026-06-16 02:42:41 +01:00
Kenny Van de Maele fafaf089c5 refactor(search): centralize the web-scraping User-Agent into one constant (#4325)
The outbound UA for web_fetch / web_search was inlined in four places with
two different values and nothing keeping them current: content.py pinned a
mid-2021 Chrome 91 build, and providers.py sent a bare Mozilla/5.0 in three
spots. Some sites serve a degraded or blocked page to a UA that old.

Add WEB_FETCH_USER_AGENT to src/constants.py (env-overridable, matching the
existing Copilot/Kimi UA-constant pattern) and import it in content.py and
providers.py. Default to a current, common desktop UA so pages return their
normal HTML: the market-leading desktop OS (Windows; NT 10.0 covers Windows
10 and 11) and browser (Chrome) on a current stable build. The version is now
bumped in one place.

Service-specific self-identifying agents (Copilot, Kimi, webhooks, cookbook)
are intentionally left separate. Adds a regression pinning the constant shape,
the env override, and a guard against a new inline Mozilla literal in the
search sources.

Closes #4324
2026-06-16 01:33:47 +00:00
10 changed files with 444 additions and 41 deletions
+90 -29
View File
@@ -6,6 +6,7 @@ Imports MemoryManager and MemoryVectorStore from the Odysseus codebase.
""" """
import asyncio import asyncio
import os
import sys import sys
import time import time
from pathlib import Path from pathlib import Path
@@ -23,6 +24,55 @@ _memory_manager = None
_memory_vector = None _memory_vector = None
_initialized = False _initialized = False
_OWNER_ENV_KEYS = ("ODYSSEUS_MCP_MEMORY_OWNER", "ODYSSEUS_MEMORY_OWNER")
_OWNER_SCOPE_ERROR = (
"Error: Memory MCP owner is not configured for an owner-scoped memory store. "
"Set ODYSSEUS_MCP_MEMORY_OWNER for this server or use the owner-aware native memory tool."
)
def _configured_owner() -> str | None:
for key in _OWNER_ENV_KEYS:
owner = os.environ.get(key, "").strip()
if owner:
return owner
return None
def _entry_owner(entry: dict) -> str | None:
owner = entry.get("owner")
if owner is None:
return None
owner_text = str(owner).strip()
return owner_text or None
def _owner_scoped_store(entries: list[dict]) -> bool:
return any(_entry_owner(entry) for entry in entries if isinstance(entry, dict))
def _scope_entries() -> tuple[str | None, list[dict], list[dict], str | None]:
"""Return configured owner, all entries, visible entries, and optional error."""
entries = _memory_manager.load_all()
owner = _configured_owner()
if owner is None and _owner_scoped_store(entries):
return None, entries, [], _OWNER_SCOPE_ERROR
if owner is None:
visible = [
entry for entry in entries
if isinstance(entry, dict) and _entry_owner(entry) is None
]
else:
visible = [
entry for entry in entries
if isinstance(entry, dict) and _entry_owner(entry) == owner
]
return owner, entries, visible, None
def _text_result(text: str) -> list[TextContent]:
return [TextContent(type="text", text=text)]
def _ensure_init(): def _ensure_init():
"""Lazy-init memory managers on first use.""" """Lazy-init memory managers on first use."""
@@ -75,24 +125,26 @@ async def list_tools() -> list[Tool]:
@server.call_tool() @server.call_tool()
async def call_tool(name: str, arguments: dict) -> list[TextContent]: async def call_tool(name: str, arguments: dict) -> list[TextContent]:
if name != "manage_memory": if name != "manage_memory":
return [TextContent(type="text", text=f"Unknown tool: {name}")] return _text_result(f"Unknown tool: {name}")
_ensure_init() _ensure_init()
if not _memory_manager: if not _memory_manager:
return [TextContent(type="text", text="Error: Memory manager not available")] return _text_result("Error: Memory manager not available")
action = arguments.get("action", "") action = arguments.get("action", "")
if action == "list": if action == "list":
category_filter = arguments.get("category", "") category_filter = arguments.get("category", "")
memories = _memory_manager.load() _owner, _all_memories, memories, scope_error = _scope_entries()
if scope_error:
return _text_result(scope_error)
if category_filter: if category_filter:
memories = [m for m in memories if m.get("category", "").lower() == category_filter.lower()] memories = [m for m in memories if m.get("category", "").lower() == category_filter.lower()]
if not memories: if not memories:
msg = "No memories found" msg = "No memories found"
if category_filter: if category_filter:
msg += f" in category '{category_filter}'" msg += f" in category '{category_filter}'"
return [TextContent(type="text", text=msg + ".")] return _text_result(msg + ".")
lines = [f"Found {len(memories)} memory entries:\n"] lines = [f"Found {len(memories)} memory entries:\n"]
for m in memories: for m in memories:
@@ -102,15 +154,17 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
if len(text) > 150: if len(text) > 150:
text = text[:150] + "..." text = text[:150] + "..."
lines.append(f"- [{cat}] `{mid}` — {text}") lines.append(f"- [{cat}] `{mid}` — {text}")
return [TextContent(type="text", text="\n".join(lines))] return _text_result("\n".join(lines))
elif action == "add": elif action == "add":
text = arguments.get("text", "") text = arguments.get("text", "")
category = arguments.get("category", "fact") category = arguments.get("category", "fact")
if not text: if not text:
return [TextContent(type="text", text="Error: Memory text cannot be empty")] return _text_result("Error: Memory text cannot be empty")
entry = _memory_manager.add_entry(text, source="ai_agent", category=category) owner, memories, _visible, scope_error = _scope_entries()
memories = _memory_manager.load_all() if scope_error:
return _text_result(scope_error)
entry = _memory_manager.add_entry(text, source="ai_agent", category=category, owner=owner)
memories.append(entry) memories.append(entry)
_memory_manager.save(memories) _memory_manager.save(memories)
if _memory_vector and _memory_vector.healthy: if _memory_vector and _memory_vector.healthy:
@@ -118,25 +172,28 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
_memory_vector.add(entry["id"], text) _memory_vector.add(entry["id"], text)
except Exception: except Exception:
pass pass
return [TextContent(type="text", text=f"Memory added: [{category}] {text} (id: {entry['id'][:8]})")] return _text_result(f"Memory added: [{category}] {text} (id: {entry['id'][:8]})")
elif action == "edit": elif action == "edit":
memory_id = arguments.get("memory_id", "") memory_id = arguments.get("memory_id", "")
new_text = arguments.get("text", "") new_text = arguments.get("text", "")
if not memory_id or not new_text: if not memory_id or not new_text:
return [TextContent(type="text", text="Error: edit needs memory_id and text")] return _text_result("Error: edit needs memory_id and text")
memories = _memory_manager.load_all() _owner, memories, visible, scope_error = _scope_entries()
found = False if scope_error:
return _text_result(scope_error)
full_id = None full_id = None
for m in memories: for m in visible:
if m.get("id", "").startswith(memory_id): if m.get("id", "").startswith(memory_id):
m["text"] = new_text
m["timestamp"] = int(time.time())
found = True
full_id = m["id"] full_id = m["id"]
break break
if not found: if not full_id:
return [TextContent(type="text", text=f"Error: Memory '{memory_id}' not found")] return _text_result(f"Error: Memory '{memory_id}' not found")
for m in memories:
if m.get("id") == full_id:
m["text"] = new_text
m["timestamp"] = int(time.time())
break
_memory_manager.save(memories) _memory_manager.save(memories)
if _memory_vector and _memory_vector.healthy and full_id: if _memory_vector and _memory_vector.healthy and full_id:
try: try:
@@ -144,24 +201,26 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
_memory_vector.add(full_id, new_text) _memory_vector.add(full_id, new_text)
except Exception: except Exception:
pass pass
return [TextContent(type="text", text=f"Memory updated: {new_text}")] return _text_result(f"Memory updated: {new_text}")
elif action == "delete": elif action == "delete":
memory_id = arguments.get("memory_id", "") memory_id = arguments.get("memory_id", "")
if not memory_id: if not memory_id:
return [TextContent(type="text", text="Error: delete needs memory_id")] return _text_result("Error: delete needs memory_id")
memories = _memory_manager.load_all() _owner, memories, visible, scope_error = _scope_entries()
if scope_error:
return _text_result(scope_error)
full_id = None full_id = None
deleted_text = "" deleted_text = ""
deleted_category = "" deleted_category = ""
for m in memories: for m in visible:
if m.get("id", "").startswith(memory_id): if m.get("id", "").startswith(memory_id):
full_id = m["id"] full_id = m["id"]
deleted_text = m.get("text", "") deleted_text = m.get("text", "")
deleted_category = m.get("category", "") deleted_category = m.get("category", "")
break break
if not full_id: if not full_id:
return [TextContent(type="text", text=f"Error: Memory '{memory_id}' not found")] return _text_result(f"Error: Memory '{memory_id}' not found")
memories = [m for m in memories if m.get("id") != full_id] memories = [m for m in memories if m.get("id") != full_id]
_memory_manager.save(memories) _memory_manager.save(memories)
if _memory_vector and _memory_vector.healthy and full_id: if _memory_vector and _memory_vector.healthy and full_id:
@@ -171,30 +230,32 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
pass pass
cat = f"[{deleted_category}] " if deleted_category else "" cat = f"[{deleted_category}] " if deleted_category else ""
snippet = deleted_text if len(deleted_text) <= 120 else deleted_text[:117] + "..." snippet = deleted_text if len(deleted_text) <= 120 else deleted_text[:117] + "..."
return [TextContent(type="text", text=f"Memory deleted: {cat}{snippet} (id: {memory_id})")] return _text_result(f"Memory deleted: {cat}{snippet} (id: {memory_id})")
elif action == "search": elif action == "search":
query = arguments.get("text", "") query = arguments.get("text", "")
if not query: if not query:
return [TextContent(type="text", text="Error: search needs text (query)")] return _text_result("Error: search needs text (query)")
memories = _memory_manager.load() _owner, _all_memories, memories, scope_error = _scope_entries()
if scope_error:
return _text_result(scope_error)
if hasattr(_memory_manager, 'get_relevant_memories'): if hasattr(_memory_manager, 'get_relevant_memories'):
results = _memory_manager.get_relevant_memories(query, memories, threshold=0.05, max_items=20) results = _memory_manager.get_relevant_memories(query, memories, threshold=0.05, max_items=20)
else: else:
query_lower = query.lower() query_lower = query.lower()
results = [m for m in memories if query_lower in m.get("text", "").lower()][:20] results = [m for m in memories if query_lower in m.get("text", "").lower()][:20]
if not results: if not results:
return [TextContent(type="text", text=f"No memories found matching '{query}'.")] return _text_result(f"No memories found matching '{query}'.")
lines = [f"Found {len(results)} matching memories:\n"] lines = [f"Found {len(results)} matching memories:\n"]
for m in results: for m in results:
cat = m.get("category", "fact") cat = m.get("category", "fact")
mid = m.get("id", "?")[:8] mid = m.get("id", "?")[:8]
text = m.get("text", "") text = m.get("text", "")
lines.append(f"- [{cat}] `{mid}` — {text}") lines.append(f"- [{cat}] `{mid}` — {text}")
return [TextContent(type="text", text="\n".join(lines))] return _text_result("\n".join(lines))
else: else:
return [TextContent(type="text", text=f"Error: Unknown action '{action}'. Use: list, add, edit, delete, search")] return _text_result(f"Error: Unknown action '{action}'. Use: list, add, edit, delete, search")
async def run(): async def run():
+2 -2
View File
@@ -15,7 +15,7 @@ from urllib.parse import urljoin, urlparse
import httpx import httpx
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from src.constants import WEB_FETCH_SOFT_MAX_BYTES, WEB_FETCH_HARD_MAX_BYTES from src.constants import WEB_FETCH_SOFT_MAX_BYTES, WEB_FETCH_HARD_MAX_BYTES, WEB_FETCH_USER_AGENT
from .analytics import RateLimitError, error_logger from .analytics import RateLimitError, error_logger
from .cache import ( from .cache import (
@@ -369,7 +369,7 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0,
# Fetch # Fetch
try: try:
headers = { headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "User-Agent": WEB_FETCH_USER_AGENT,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5", "Accept-Language": "en-US,en;q=0.5",
# identity so the streamed size cap in _get_public_url stays honest # identity so the streamed size cap in _get_public_url stays honest
+4 -4
View File
@@ -9,7 +9,7 @@ from urllib.parse import urljoin, urlparse, parse_qs
import httpx import httpx
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from src.constants import SEARXNG_INSTANCE, REQUEST_TIMEOUT from src.constants import SEARXNG_INSTANCE, REQUEST_TIMEOUT, WEB_FETCH_USER_AGENT
from .analytics import RateLimitError, error_logger from .analytics import RateLimitError, error_logger
from .query import build_enhanced_query from .query import build_enhanced_query
@@ -138,7 +138,7 @@ def searxng_search_api(query: str, count: Optional[int] = None, categories: str
count = count if count is not None else _get_result_count() count = count if count is not None else _get_result_count()
instance = _get_search_instance() instance = _get_search_instance()
api_key = "" api_key = ""
headers = {"User-Agent": "Mozilla/5.0"} headers = {"User-Agent": WEB_FETCH_USER_AGENT}
if api_key: if api_key:
headers["Authorization"] = f"Bearer {api_key}" headers["Authorization"] = f"Bearer {api_key}"
# News/fresh queries do badly in the 'general' category — it favours # News/fresh queries do badly in the 'general' category — it favours
@@ -250,7 +250,7 @@ def searxng_search(query, max_results=10):
"""Search using SearXNG instance - parsing HTML.""" """Search using SearXNG instance - parsing HTML."""
instance = _get_search_instance() instance = _get_search_instance()
api_key = "" api_key = ""
req_headers = {"User-Agent": "Mozilla/5.0"} req_headers = {"User-Agent": WEB_FETCH_USER_AGENT}
if api_key: if api_key:
req_headers["Authorization"] = f"Bearer {api_key}" req_headers["Authorization"] = f"Bearer {api_key}"
try: try:
@@ -389,7 +389,7 @@ def duckduckgo_search(query: str, count: Optional[int] = None, time_filter: Opti
response = httpx.get( response = httpx.get(
"https://html.duckduckgo.com/html/", "https://html.duckduckgo.com/html/",
params={"q": query, "kp": _safesearch_for("duckduckgo_html")}, params={"q": query, "kp": _safesearch_for("duckduckgo_html")},
headers={"User-Agent": "Mozilla/5.0"}, headers={"User-Agent": WEB_FETCH_USER_AGENT},
timeout=REQUEST_TIMEOUT, timeout=REQUEST_TIMEOUT,
) )
response.raise_for_status() response.raise_for_status()
+14 -1
View File
@@ -57,7 +57,13 @@ MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors")
# Paths with an intentional dedicated env override, defaulting under DATA_DIR. # Paths with an intentional dedicated env override, defaulting under DATA_DIR.
MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments")) MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments"))
FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache")) # `or` (not os.getenv's default arg) so a PRESENT-but-EMPTY value falls back to
# the default. docker-compose.yml injects `FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}`,
# which sets the var to "" when the host hasn't defined it. os.getenv(name, default)
# only returns the default when the var is ABSENT, so the empty string would win →
# os.makedirs("") raises [Errno 2] No such file or directory: '' → FastEmbed fails to
# init and all vector features (RAG, semantic memory, tool index) silently degrade.
FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join(DATA_DIR, "fastembed_cache")
# Agent tool output limits (single source of truth — imported by tool_execution.py, # Agent tool output limits (single source of truth — imported by tool_execution.py,
# tool_implementations.py, agent_tools.py, and any other module that needs them) # tool_implementations.py, agent_tools.py, and any other module that needs them)
@@ -78,6 +84,13 @@ MAX_CONTEXT_MESSAGES = 90
REQUEST_TIMEOUT = 20 REQUEST_TIMEOUT = 20
OPENAI_COMPAT_PATH = "/v1/chat/completions" OPENAI_COMPAT_PATH = "/v1/chat/completions"
# Outbound UA for web_fetch / web_search scraping; common desktop UA so pages serve normal HTML.
WEB_FETCH_USER_AGENT = os.environ.get(
"WEB_FETCH_USER_AGENT",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
)
# Environment variables with defaults # Environment variables with defaults
DEFAULT_HOST = os.getenv("LLM_HOST", "localhost") DEFAULT_HOST = os.getenv("LLM_HOST", "localhost")
LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()] LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()]
+24 -5
View File
@@ -236,6 +236,29 @@ def _digest_windows(now):
] ]
def _checkin_calendar_events(db, owner, start, end):
"""Calendar events in [start, end] for ONE owner, for the check-in digest.
Ownership lives on CalendarCal.owner; events inherit it via calendar_id.
The digest query had no owner scope, so it pulled EVERY user's events into
one user's check-in (a cross-tenant leak of summaries/locations). Scope it
by joining CalendarCal, mirroring routes/calendar_routes.list_events.
"""
from core.database import CalendarEvent as _CE, CalendarCal as _CC
return (
db.query(_CE)
.join(_CC, _CE.calendar_id == _CC.id)
.filter(
_CC.owner == owner,
_CE.dtstart >= start,
_CE.dtstart <= end,
_CE.status != "cancelled",
)
.order_by(_CE.dtstart)
.all()
)
class TaskScheduler: class TaskScheduler:
def __init__(self, session_manager): def __init__(self, session_manager):
self._session_manager = session_manager self._session_manager = session_manager
@@ -1127,11 +1150,7 @@ class TaskScheduler:
# Strip timezone for naive DB comparison # Strip timezone for naive DB comparison
_s = start.replace(tzinfo=None) if start.tzinfo else start _s = start.replace(tzinfo=None) if start.tzinfo else start
_e = end.replace(tzinfo=None) if end.tzinfo else end _e = end.replace(tzinfo=None) if end.tzinfo else end
evs = _db.query(_CE).filter( evs = _checkin_calendar_events(_db, task.owner, _s, _e)
_CE.dtstart >= _s,
_CE.dtstart <= _e,
_CE.status != "cancelled",
).order_by(_CE.dtstart).all()
if not evs: if not evs:
continue continue
# Group by importance for richer output # Group by importance for richer output
+3
View File
@@ -219,6 +219,9 @@ class _WebhookManager:
async def fire(self, event, payload): async def fire(self, event, payload):
return None return None
def fire_and_forget(self, event, payload):
return None
def _install_sync_chat_stubs(monkeypatch): def _install_sync_chat_stubs(monkeypatch):
# FastAPI checks for python_multipart at import time when Form is used; # FastAPI checks for python_multipart at import time when Form is used;
+70
View File
@@ -0,0 +1,70 @@
"""Check-in calendar digest must be scoped to the task owner.
The digest query selected CalendarEvent with no owner scope, so a scheduled
check-in for one user pulled EVERY user's calendar events (summaries,
locations) into their digest — a cross-tenant leak. Ownership lives on
CalendarCal.owner; the query must join it, like routes/calendar_routes.
"""
import tempfile
import uuid
from datetime import datetime
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import NullPool
import core.database as cdb
from core.database import CalendarEvent, CalendarCal
from src.task_scheduler import _checkin_calendar_events
_TMPDB = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
_ENGINE = create_engine(f"sqlite:///{_TMPDB.name}", connect_args={"check_same_thread": False}, poolclass=NullPool)
cdb.Base.metadata.create_all(_ENGINE)
_TS = sessionmaker(bind=_ENGINE, autoflush=False, autocommit=False)
def _seed():
db = _TS()
try:
db.query(CalendarEvent).delete(); db.query(CalendarCal).delete()
db.add(CalendarCal(id="calA", owner="alice", name="A"))
db.add(CalendarCal(id="calB", owner="bob", name="B"))
db.add(CalendarEvent(uid="a1", calendar_id="calA", summary="Alice mtg",
dtstart=datetime(2026, 6, 10, 9, 0),
dtend=datetime(2026, 6, 10, 10, 0), status="confirmed"))
db.add(CalendarEvent(uid="b1", calendar_id="calB", summary="Bob secret",
dtstart=datetime(2026, 6, 10, 10, 0),
dtend=datetime(2026, 6, 10, 11, 0), status="confirmed"))
db.commit()
finally:
db.close()
def test_digest_only_returns_owner_events():
_seed()
db = _TS()
try:
s, e = datetime(2026, 6, 1), datetime(2026, 6, 30)
alice = _checkin_calendar_events(db, "alice", s, e)
assert [ev.summary for ev in alice] == ["Alice mtg"] # not Bob's
bob = _checkin_calendar_events(db, "bob", s, e)
assert [ev.summary for ev in bob] == ["Bob secret"]
finally:
db.close()
def test_cancelled_excluded_and_window_respected():
_seed()
db = _TS()
try:
db2 = _TS()
db2.add(CalendarEvent(uid="a2", calendar_id="calA", summary="cancelled",
dtstart=datetime(2026, 6, 11),
dtend=datetime(2026, 6, 11, 1, 0), status="cancelled"))
db2.commit(); db2.close()
s, e = datetime(2026, 6, 1), datetime(2026, 6, 30)
out = _checkin_calendar_events(db, "alice", s, e)
assert "cancelled" not in [ev.summary for ev in out]
finally:
db.close()
+69
View File
@@ -0,0 +1,69 @@
"""Regression: FASTEMBED_CACHE_DIR must tolerate a PRESENT-but-EMPTY
FASTEMBED_CACHE_PATH.
docker-compose.yml injects ``FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}``,
which sets the variable to ``""`` when the host has not defined it. The old
``os.getenv("FASTEMBED_CACHE_PATH", default)`` only used the default when the
variable was ABSENT, so an empty value made ``FASTEMBED_CACHE_DIR == ""`` →
``os.makedirs("")`` raised ``[Errno 2] No such file or directory: ''`` →
FastEmbed failed to initialise and every vector feature (RAG, semantic memory,
tool index) silently degraded on the default Docker stack.
These tests pin the fix: empty is treated like absent → use the DATA_DIR
default, while an explicit non-empty override is still honoured.
"""
from __future__ import annotations
import importlib
import os
import src.constants as constants
def _reload_with(monkeypatch, value):
"""Reload src.constants with FASTEMBED_CACHE_PATH set to ``value`` (or
removed when ``value`` is None) and return the reloaded module."""
if value is None:
monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False)
else:
monkeypatch.setenv("FASTEMBED_CACHE_PATH", value)
return importlib.reload(constants)
def _restore(monkeypatch):
"""Return the module to its env-default state so reloading it here does
not leak a test-specific FASTEMBED_CACHE_DIR into other tests."""
monkeypatch.delenv("FASTEMBED_CACHE_PATH", raising=False)
importlib.reload(constants)
def test_empty_fastembed_cache_path_falls_back_to_default(monkeypatch):
"""The bug: an empty FASTEMBED_CACHE_PATH (exactly what Docker injects)
must fall back to the DATA_DIR default, never the empty string."""
try:
mod = _reload_with(monkeypatch, "")
assert mod.FASTEMBED_CACHE_DIR, "empty env must not yield an empty path"
assert mod.FASTEMBED_CACHE_DIR == os.path.join(mod.DATA_DIR, "fastembed_cache")
finally:
_restore(monkeypatch)
def test_unset_fastembed_cache_path_uses_default(monkeypatch):
"""Sanity: an absent variable also resolves to the default."""
try:
mod = _reload_with(monkeypatch, None)
assert mod.FASTEMBED_CACHE_DIR == os.path.join(mod.DATA_DIR, "fastembed_cache")
finally:
_restore(monkeypatch)
def test_explicit_fastembed_cache_path_is_respected(monkeypatch):
"""A real explicit override must still win — the fix only changes the
empty-value handling, not the documented FASTEMBED_CACHE_PATH override."""
custom = os.path.join("custom", "fastembed-cache")
try:
mod = _reload_with(monkeypatch, custom)
assert mod.FASTEMBED_CACHE_DIR == custom
finally:
_restore(monkeypatch)
+150
View File
@@ -0,0 +1,150 @@
import asyncio
import mcp_servers.memory_server as memory_server
from src.memory import MemoryManager
class FakeVector:
healthy = True
def __init__(self):
self.added = []
self.removed = []
def add(self, memory_id, text):
self.added.append((memory_id, text))
def remove(self, memory_id):
self.removed.append(memory_id)
def _tool_text(arguments):
result = asyncio.run(memory_server.call_tool("manage_memory", arguments))
return result[0].text
def _entry(manager, text, owner=None, memory_id=None, category="fact"):
entry = manager.add_entry(text, owner=owner, category=category)
if memory_id:
entry["id"] = memory_id
return entry
def _configure_server(monkeypatch, manager, vector=None):
monkeypatch.setattr(memory_server, "_memory_manager", manager)
monkeypatch.setattr(memory_server, "_memory_vector", vector)
monkeypatch.setattr(memory_server, "_initialized", True)
for key in memory_server._OWNER_ENV_KEYS:
monkeypatch.delenv(key, raising=False)
def test_mcp_memory_uses_configured_owner_for_all_operations(monkeypatch, tmp_path):
manager = MemoryManager(str(tmp_path))
vector = FakeVector()
alice = _entry(
manager,
"Alice likes green tea",
owner="alice",
memory_id="aaaaaaaa-0000-0000-0000-000000000000",
)
bob = _entry(
manager,
"Bob likes espresso",
owner="bob",
memory_id="bbbbbbbb-0000-0000-0000-000000000000",
)
manager.save([alice, bob])
_configure_server(monkeypatch, manager, vector)
monkeypatch.setenv("ODYSSEUS_MCP_MEMORY_OWNER", "alice")
list_text = _tool_text({"action": "list"})
assert "Alice likes green tea" in list_text
assert "Bob likes espresso" not in list_text
search_text = _tool_text({"action": "search", "text": "likes"})
assert "Alice likes green tea" in search_text
assert "Bob likes espresso" not in search_text
add_text = _tool_text({
"action": "add",
"text": "Alice prefers concise notes",
"category": "preference",
})
assert "Memory added" in add_text
added = next(
entry for entry in manager.load_all()
if entry["text"] == "Alice prefers concise notes"
)
assert added["owner"] == "alice"
assert vector.added == [(added["id"], "Alice prefers concise notes")]
edit_text = _tool_text({
"action": "edit",
"memory_id": bob["id"][:8],
"text": "Bob changed",
})
assert edit_text == "Error: Memory 'bbbbbbbb' not found"
bob_after_edit = next(
entry for entry in manager.load_all()
if entry["id"] == bob["id"]
)
assert bob_after_edit["text"] == "Bob likes espresso"
delete_text = _tool_text({"action": "delete", "memory_id": bob["id"][:8]})
assert delete_text == "Error: Memory 'bbbbbbbb' not found"
assert any(entry["id"] == bob["id"] for entry in manager.load_all())
def test_mcp_memory_fails_closed_without_owner_for_owner_scoped_store(monkeypatch, tmp_path):
manager = MemoryManager(str(tmp_path))
alice = _entry(manager, "Alice private memory", owner="alice", memory_id="aaaaaaaa-0000")
bob = _entry(manager, "Bob private memory", owner="bob", memory_id="bbbbbbbb-0000")
manager.save([alice, bob])
_configure_server(monkeypatch, manager, FakeVector())
before = manager.load_all()
actions = [
{"action": "list"},
{"action": "search", "text": "private"},
{"action": "add", "text": "new ownerless memory"},
{"action": "edit", "memory_id": alice["id"][:8], "text": "changed"},
{"action": "delete", "memory_id": alice["id"][:8]},
]
for arguments in actions:
assert _tool_text(arguments).startswith("Error: Memory MCP owner is not configured")
assert manager.load_all() == before
def test_mcp_memory_preserves_ownerless_local_behavior(monkeypatch, tmp_path):
manager = MemoryManager(str(tmp_path))
legacy = _entry(
manager,
"Legacy local memory",
memory_id="llllllll-0000-0000-0000-000000000000",
)
manager.save([legacy])
_configure_server(monkeypatch, manager, FakeVector())
assert "Legacy local memory" in _tool_text({"action": "list"})
assert "Legacy local memory" in _tool_text({"action": "search", "text": "legacy"})
add_text = _tool_text({"action": "add", "text": "Another local memory"})
assert "Memory added" in add_text
added = next(
entry for entry in manager.load_all()
if entry["text"] == "Another local memory"
)
assert "owner" not in added
assert _tool_text({
"action": "edit",
"memory_id": legacy["id"][:8],
"text": "Updated local memory",
}) == "Memory updated: Updated local memory"
assert any(entry["text"] == "Updated local memory" for entry in manager.load_all())
delete_text = _tool_text({"action": "delete", "memory_id": legacy["id"][:8]})
assert delete_text.startswith("Memory deleted:")
assert all(entry["id"] != legacy["id"] for entry in manager.load_all())
+18
View File
@@ -0,0 +1,18 @@
"""The web scraping path routes its User-Agent through one constant.
Guards the dedup: web_fetch / web_search outbound UAs go through
WEB_FETCH_USER_AGENT, so a stale or bare Mozilla string cannot be re-inlined in
the search sources.
"""
from pathlib import Path
_SEARCH = Path(__file__).resolve().parent.parent / "services" / "search"
def test_search_sources_have_no_inline_mozilla_ua():
offenders = [
str(py.relative_to(_SEARCH.parent.parent))
for py in _SEARCH.rglob("*.py")
if "Mozilla/" in py.read_text(encoding="utf-8")
]
assert not offenders, f"inline Mozilla UA found; use WEB_FETCH_USER_AGENT: {offenders}"