From ac94885c84b5af072c85d3e3f5e819f0033587e1 Mon Sep 17 00:00:00 2001 From: Mike <586280+dot-mike@users.noreply.github.com> Date: Mon, 8 Jun 2026 09:58:52 +0200 Subject: [PATCH] refactor(constants): single source of truth for data dir (#3368) * refactor(constants): single source of truth for data dir + merge core/src constants Co-Authored-By: Claude Opus 4.8 (1M context) * docs(contributing): use named src.constants for data paths, drop core/constants references Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) --- .env.example | 7 +++ CONTRIBUTING.md | 6 +- app.py | 6 +- companion/pairing.py | 4 +- core/auth.py | 5 +- core/constants.py | 69 ++++------------------- core/database.py | 13 +++-- mcp_servers/email_server.py | 11 ++-- mcp_servers/image_gen_server.py | 4 +- routes/admin_wipe_routes.py | 10 ++-- routes/codex_routes.py | 8 +-- routes/contacts_routes.py | 7 ++- routes/cookbook_routes.py | 5 +- routes/document_routes.py | 6 +- routes/email_helpers.py | 9 +-- routes/email_routes.py | 3 +- routes/embedding_routes.py | 12 +--- routes/emoji_routes.py | 4 +- routes/gallery_routes.py | 7 ++- routes/mcp_routes.py | 4 +- routes/note_routes.py | 5 +- routes/personal_routes.py | 4 +- routes/prefs_routes.py | 3 +- routes/research_routes.py | 17 +++--- routes/task_routes.py | 5 +- routes/vault_routes.py | 3 +- scripts/claim_ownerless.py | 6 +- scripts/index_documents.py | 5 +- scripts/migrate_faiss_to_chroma.py | 6 +- services/docs/service.py | 3 +- services/memory/service.py | 3 +- services/research/research_handler.py | 3 +- services/tts/tts_service.py | 4 +- setup.py | 27 +++++---- src/ai_interaction.py | 6 +- src/bg_jobs.py | 7 ++- src/builtin_actions.py | 17 +++--- src/config.py | 18 +++--- src/constants.py | 59 ++++++++++++++++++- src/cookbook_serve_lifecycle.py | 5 +- src/embeddings.py | 12 ++-- src/event_bus.py | 6 +- src/generated_images.py | 4 +- src/integrations.py | 5 +- src/rag_manager.py | 4 +- src/rag_singleton.py | 5 +- src/rag_vector.py | 4 +- src/research_handler.py | 3 +- src/secret_storage.py | 3 +- src/tool_execution.py | 4 +- src/tool_implementations.py | 8 +-- tests/test_companion_pairing.py | 9 +-- tests/test_cookbook_state_path.py | 29 ---------- tests/test_research_owner_scope_routes.py | 10 ++++ tests/test_security_regressions.py | 8 +-- tests/test_setup_admin_user.py | 2 +- 56 files changed, 279 insertions(+), 243 deletions(-) delete mode 100644 tests/test_cookbook_state_path.py diff --git a/.env.example b/.env.example index 10276433d..63708ea31 100644 --- a/.env.example +++ b/.env.example @@ -56,6 +56,13 @@ SEARXNG_INSTANCE=http://localhost:8080 # SQLite database path (default: sqlite:///./data/app.db) # DATABASE_URL=sqlite:///./data/app.db +# ============================================================ +# Data directory +# ============================================================ +# Move everything that lives under data/ - settings, sessions, database, auth, +# cache, uploads, etc. - to another path: +# ODYSSEUS_DATA_DIR=C:\path\to\dir + # ============================================================ # Auth & Security # ============================================================ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 96fd54781..174a4f2f6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -98,11 +98,11 @@ If you are unsure whether a change is "visual," it is. Default to attaching a sc Don't hardcode values that the project already exposes through a constant or a helper. Hardcoded literals drift out of sync, break on non-default deployments, and reintroduce bugs we've already fixed. -- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree or hardcode `/app/...`. Use `DATA_DIR` (and the other path constants) from `core.constants`, e.g. `Path(DATA_DIR) / "logs" / "x.log"`. The source tree is read-only in Docker, and `/app/...` does not exist on native runs. Guard directory creation so an unwritable path degrades gracefully instead of crashing at import. -- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `core.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`). +- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree, hardcode `/app/...`, or use a relative `"data/..."` string. Every persisted file and directory has a named constant in `src/constants.py` (for example `AUTH_FILE`, `USER_PREFS_FILE`, `SETTINGS_FILE`, `TTS_CACHE_DIR`, `CHROMA_DIR`). Import and use that named constant; do not re-derive the path locally with `os.path.join(DATA_DIR, "x.json")` or `DATA_DIR / "x.json"`. `DATA_DIR` is the single place that reads `ODYSSEUS_DATA_DIR`, so use it directly only for dynamic paths that have no fixed name (for example per-owner files). If a data file or directory has no constant yet, add one to `src/constants.py`. The source tree is read-only in Docker and `/app/...` does not exist on native runs; guard directory creation so an unwritable path degrades gracefully instead of crashing at import. +- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `src.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`). - **Ports, limits, model lists, and similar:** reuse the existing constant if one exists; if it doesn't and the value is used in more than one place, add a constant rather than copying the literal. -If you need a value that has no constant or helper yet, add one in the appropriate module (usually `core/constants.py` or `src/constants.py`) and import it, rather than repeating a literal across files. +If you need a value that has no constant or helper yet, add it to `src/constants.py` (the single source of truth for paths and config; `core/constants.py` only re-exports it for backward compatibility) and import it, rather than repeating a literal across files. **Commits:** use [Conventional Commits](https://www.conventionalcommits.org), `type(scope): summary` (e.g. `fix(search): ...`, `feat(notes): ...`, `docs(contributing): ...`). Common types: `fix`, `feat`, `refactor`, `docs`, `test`, `chore`, `ci`. Keep the subject short and imperative; put the "why" in the body when it isn't obvious. diff --git a/app.py b/app.py index e57e85706..80e9d9f5b 100644 --- a/app.py +++ b/app.py @@ -51,7 +51,7 @@ from starlette.middleware.base import BaseHTTPMiddleware # Core imports from core.constants import ( BASE_DIR, STATIC_DIR, SESSIONS_FILE, - REQUEST_TIMEOUT, OPENAI_API_KEY, + REQUEST_TIMEOUT, OPENAI_API_KEY, AUTH_FILE, ) from core.database import SessionLocal, ApiToken from core.middleware import SecurityHeadersMiddleware, is_cors_preflight @@ -954,7 +954,7 @@ async def _startup_event(): owners = set() try: import json as _json - auth_path = "data/auth.json" + auth_path = AUTH_FILE with open(auth_path, encoding="utf-8") as f: users = _json.load(f).get("users", {}) owners.update(users.keys()) @@ -1001,7 +1001,7 @@ async def _startup_event(): # does not make an existing library look empty after auth/account changes. try: import json as _json - auth_path = "data/auth.json" + auth_path = AUTH_FILE with open(auth_path, encoding="utf-8") as f: users = _json.load(f).get("users", {}) primary_owner = None diff --git a/companion/pairing.py b/companion/pairing.py index 48197302b..c4ea62345 100644 --- a/companion/pairing.py +++ b/companion/pairing.py @@ -14,6 +14,8 @@ import uuid import bcrypt +from src.constants import AUTH_FILE + PAIRING_VERSION = 1 COMPANION_SCOPE = "chat" @@ -61,7 +63,7 @@ def lan_ip_candidates() -> list[str]: def find_admin_user() -> str | None: """Resolve an admin username from data/auth.json (schema uses is_admin), falling back to the first user.""" - auth_path = os.path.join("data", "auth.json") + auth_path = AUTH_FILE try: with open(auth_path, "r", encoding="utf-8") as f: data = json.load(f) diff --git a/core/auth.py b/core/auth.py index 011b1af2c..80fce1825 100644 --- a/core/auth.py +++ b/core/auth.py @@ -37,9 +37,8 @@ DEFAULT_PRIVILEGES = { ADMIN_PRIVILEGES = {k: (True if isinstance(v, bool) else (0 if isinstance(v, int) else [])) for k, v in DEFAULT_PRIVILEGES.items()} ADMIN_PRIVILEGES["allowed_models_restricted"] = False -DEFAULT_AUTH_PATH = os.path.join( - Path(__file__).parent.parent, "data", "auth.json" -) +from src.constants import AUTH_FILE +DEFAULT_AUTH_PATH = AUTH_FILE TOKEN_TTL = 60 * 60 * 24 * 7 # 7 days # Usernames the auth + middleware layer reserve as internal "synthetic owner" diff --git a/core/constants.py b/core/constants.py index 9c5580b00..d71bb0aed 100644 --- a/core/constants.py +++ b/core/constants.py @@ -1,59 +1,12 @@ -# src/constants.py -"""Application-wide constants and configuration values.""" -import os +# core/constants.py +"""Backward-compatible shim — the single source of truth is src/constants.py. -APP_VERSION = "0.9.1" - -# Base paths -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/" -STATIC_DIR = os.path.join(BASE_DIR, "static") -DATA_DIR = os.path.join(BASE_DIR, "data") - -# Data file paths -SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json") -MEMORY_FILE = os.path.join(DATA_DIR, "memory.json") -MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md") -PERSONAL_DIR = os.path.join(DATA_DIR, "personal_docs") -RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook") -UPLOAD_DIR = os.path.join(DATA_DIR, "uploads") -FEATURES_FILE = os.path.join(DATA_DIR, "features.json") -SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json") - -# API Configuration -MAX_CONTEXT_MESSAGES = 90 -REQUEST_TIMEOUT = 20 -OPENAI_COMPAT_PATH = "/v1/chat/completions" - -# Environment variables with defaults -DEFAULT_HOST = os.getenv("LLM_HOST", "localhost") -LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()] -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") -SEARXNG_INSTANCE = os.getenv('SEARXNG_INSTANCE', 'http://localhost:8080') - - -# Cleanup configuration -CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true" -CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24")) - -# Default parameters -DEFAULT_TEMPERATURE = 1.0 -DEFAULT_MAX_TOKENS = 0 - - -def internal_api_base() -> str: - """Base URL for in-process loopback calls to Odysseus's own API. - - Agent tools and background jobs reach admin-gated routes by calling the - running server over HTTP. Resolution order: - 1. ODYSSEUS_INTERNAL_BASE - explicit override (e.g. behind a TLS proxy). - 2. APP_PORT - http://127.0.0.1:$APP_PORT (docker-compose). - 3. Fallback http://127.0.0.1:7000 - legacy default. - - 127.0.0.1 (not "localhost") avoids IPv6/DNS ambiguity for a strictly-local - call. Without this, loopback tools fail with "All connection attempts - failed" whenever the server is not on port 7000. - """ - override = os.environ.get("ODYSSEUS_INTERNAL_BASE") - if override: - return override.rstrip("/") - return f"http://127.0.0.1:{os.environ.get('APP_PORT', '7000')}" +Historically there were two copies of this module (this one lagged behind at +APP_VERSION 0.9.1 and was missing the consolidated tool-output constants). To +kill the drift, this now simply re-exports everything from src.constants so +there is exactly one place that defines paths and reads ODYSSEUS_DATA_DIR. +internal_api_base() also lives in src.constants now and is re-exported here so +existing `from core.constants import internal_api_base` callers keep working. +""" +from src.constants import * # noqa: F401,F403 +from src.constants import internal_api_base # noqa: F401 (explicit: functions aren't covered by some linters' * checks) diff --git a/core/database.py b/core/database.py index 241f3892b..85692e8c5 100644 --- a/core/database.py +++ b/core/database.py @@ -29,8 +29,9 @@ class TimestampMixin: def updated_at(cls): return Column(DateTime, default=utcnow_naive, onupdate=utcnow_naive, nullable=False) -# Get database URL from environment, default to SQLite -DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/app.db") +# Get database URL from environment, default to SQLite in DATA_DIR +from src.constants import DATA_DIR, AUTH_FILE, MEMORY_FILE, USER_PREFS_FILE, SETTINGS_FILE +DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{DATA_DIR}/app.db") # Create engine engine = create_engine( @@ -1065,7 +1066,7 @@ def _migrate_assign_legacy_owner(): # fell through to "first user" every time. auth_path = os.path.join(os.path.dirname(DATABASE_URL.replace("sqlite:///", "")), "auth.json") if not os.path.isabs(auth_path): - auth_path = os.path.join("data", "auth.json") + auth_path = AUTH_FILE admin_user = None try: with open(auth_path, "r", encoding="utf-8") as f: @@ -1118,7 +1119,7 @@ def _migrate_assign_legacy_owner(): logger.warning(f"Legacy owner migration failed: {e}") # Also migrate memory.json - mem_path = os.path.join("data", "memory.json") + mem_path = MEMORY_FILE try: if os.path.exists(mem_path): with open(mem_path, "r", encoding="utf-8") as f: @@ -1136,7 +1137,7 @@ def _migrate_assign_legacy_owner(): logger.warning(f"memory.json legacy migration failed: {e}") # Also migrate user_prefs.json to per-user format - prefs_path = os.path.join("data", "user_prefs.json") + prefs_path = USER_PREFS_FILE try: if os.path.exists(prefs_path): with open(prefs_path, "r", encoding="utf-8") as f: @@ -1530,7 +1531,7 @@ def _migrate_seed_email_account(): import json as _json import uuid as _uuid from pathlib import Path - settings_file = Path("data/settings.json") + settings_file = Path(SETTINGS_FILE) if not settings_file.exists(): return try: diff --git a/mcp_servers/email_server.py b/mcp_servers/email_server.py index 285d928d2..3b1ba84d1 100644 --- a/mcp_servers/email_server.py +++ b/mcp_servers/email_server.py @@ -31,7 +31,8 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) server = Server("email") EMAIL_SOCKET_TIMEOUT = float(os.environ.get("EMAIL_SOCKET_TIMEOUT", "20")) -DATA_DIR = Path(__file__).resolve().parent.parent / "data" +from src.constants import DATA_DIR as _DATA_DIR, APP_DB, EMAIL_CACHE_DB, SETTINGS_FILE as _SETTINGS_FILE, MAIL_ATTACHMENTS_DIR +DATA_DIR = Path(_DATA_DIR) def _b(value) -> bytes: @@ -63,7 +64,7 @@ def _clean_header_value(value) -> str: def _db_path() -> Path: - return DATA_DIR / "app.db" + return Path(APP_DB) def _list_accounts_raw() -> list: @@ -162,7 +163,7 @@ def _load_config(account: str | None = None) -> dict: "trash_folder": os.environ.get("TRASH_FOLDER", "Trash"), "cache_db": os.environ.get( "EMAIL_CACHE_DB", - str(DATA_DIR / "email_cache.db"), + EMAIL_CACHE_DB, ), "account_id": None, "account_name": None, @@ -204,7 +205,7 @@ def _load_config(account: str | None = None) -> dict: else: # Legacy fallback: settings.json flat keys try: - settings_path = Path(__file__).resolve().parent.parent / "data" / "settings.json" + settings_path = Path(_SETTINGS_FILE) if settings_path.exists(): settings = json.loads(settings_path.read_text(encoding="utf-8")) for key in ( @@ -1061,7 +1062,7 @@ def _download_attachment(uid, index, folder="INBOX", account=None): raw = msg_data[0][1] msg = email.message_from_bytes(raw) - target_dir = DATA_DIR / "mail-attachments" / f"{folder}_{uid}" + target_dir = Path(MAIL_ATTACHMENTS_DIR) / f"{folder}_{uid}" filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} diff --git a/mcp_servers/image_gen_server.py b/mcp_servers/image_gen_server.py index 4607b0834..0c8d3884a 100644 --- a/mcp_servers/image_gen_server.py +++ b/mcp_servers/image_gen_server.py @@ -16,6 +16,8 @@ from mcp.types import Tool, TextContent sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from src.constants import GENERATED_IMAGES_DIR + server = Server("image_gen") @@ -121,7 +123,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: _pub_base = (get_setting("app_public_url", "") or "").rstrip("/") if img.get("b64_json"): - img_dir = Path("data/generated_images") + img_dir = Path(GENERATED_IMAGES_DIR) img_dir.mkdir(parents=True, exist_ok=True) filename = f"{uuid.uuid4().hex[:12]}.png" img_path = img_dir / filename diff --git a/routes/admin_wipe_routes.py b/routes/admin_wipe_routes.py index 01511c373..212e2a768 100644 --- a/routes/admin_wipe_routes.py +++ b/routes/admin_wipe_routes.py @@ -31,7 +31,7 @@ from core.database import ( CalendarEvent, CalendarCal, ) -from src.constants import DATA_DIR +from src.constants import DATA_DIR, SKILLS_DIR, SKILLS_FILE, GALLERY_DIR, GALLERY_UPLOADS_DIR logger = logging.getLogger(__name__) @@ -107,7 +107,7 @@ def setup_admin_wipe_routes(session_manager): # Skills live as SKILL.md files under data/skills/. Drop # the entire directory; the SkillsManager re-creates the # tree on next write. - skills_dir = os.path.join(DATA_DIR, "skills") + skills_dir = SKILLS_DIR count = 0 if os.path.isdir(skills_dir): # Count SKILL.md files for the response — quick walk. @@ -115,7 +115,7 @@ def setup_admin_wipe_routes(session_manager): count += sum(1 for f in files if f == "SKILL.md") _rmtree_quiet(skills_dir) # Legacy fallback file - legacy = os.path.join(DATA_DIR, "skills.json") + legacy = SKILLS_FILE if os.path.exists(legacy): try: os.remove(legacy) @@ -151,8 +151,8 @@ def setup_admin_wipe_routes(session_manager): db.query(GalleryAlbum).delete() db.commit() # Also drop the upload dir so disk doesn't keep orphans. - _rmtree_quiet(os.path.join(DATA_DIR, "gallery")) - _rmtree_quiet(os.path.join(DATA_DIR, "gallery_uploads")) + _rmtree_quiet(GALLERY_DIR) + _rmtree_quiet(GALLERY_UPLOADS_DIR) return {"status": "deleted", "kind": kind, "count": count} if kind == "calendar": diff --git a/routes/codex_routes.py b/routes/codex_routes.py index e1f3b49c0..1afac02b9 100644 --- a/routes/codex_routes.py +++ b/routes/codex_routes.py @@ -17,7 +17,7 @@ from fastapi.responses import StreamingResponse from src.auth_helpers import require_authenticated_request, require_user from src.tool_implementations import do_manage_notes -from core.constants import DATA_DIR +from src.constants import COOKBOOK_STATE_FILE COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"} @@ -425,8 +425,8 @@ def setup_codex_routes( def _read_cookbook_state() -> dict: from pathlib import Path as _Path - import os as _os, json as _json - p = _Path(DATA_DIR) / "cookbook_state.json" + import json as _json + p = _Path(COOKBOOK_STATE_FILE) if not p.exists(): return {} try: @@ -734,7 +734,7 @@ def setup_codex_routes( import time as _t, json as _json from core.atomic_io import atomic_write_json from pathlib import Path as _Path - cookbook_state_path = _Path(DATA_DIR) / "cookbook_state.json" + cookbook_state_path = _Path(COOKBOOK_STATE_FILE) try: state = _json.loads(cookbook_state_path.read_text(encoding="utf-8")) except Exception: diff --git a/routes/contacts_routes.py b/routes/contacts_routes.py index 8a90cf473..e4e8ce759 100644 --- a/routes/contacts_routes.py +++ b/routes/contacts_routes.py @@ -25,9 +25,10 @@ from src.url_safety import check_outbound_url logger = logging.getLogger(__name__) -DATA_DIR = Path(__file__).resolve().parent.parent / "data" -SETTINGS_FILE = DATA_DIR / "settings.json" -LOCAL_CONTACTS_FILE = DATA_DIR / "contacts.json" +from src.constants import DATA_DIR as _DATA_DIR, SETTINGS_FILE as _SETTINGS_FILE, CONTACTS_FILE as _CONTACTS_FILE +DATA_DIR = Path(_DATA_DIR) +SETTINGS_FILE = Path(_SETTINGS_FILE) +LOCAL_CONTACTS_FILE = Path(_CONTACTS_FILE) def _load_settings(): diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index 5974526eb..228660ef3 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -15,6 +15,7 @@ from pathlib import Path from fastapi import APIRouter, HTTPException, Request, Depends from src.auth_helpers import require_user +from src.constants import COOKBOOK_STATE_FILE from pydantic import BaseModel from core.middleware import require_admin @@ -33,7 +34,7 @@ from core.platform_compat import ( get_wsl_windows_user_profile, ) from routes.shell_routes import TMUX_LOG_DIR -from core.constants import DATA_DIR +from src.constants import COOKBOOK_STATE_FILE logger = logging.getLogger(__name__) @@ -61,7 +62,7 @@ _HF_TOKEN_STATUS_SNIPPET = ( def setup_cookbook_routes() -> APIRouter: router = APIRouter(tags=["cookbook"]) - _cookbook_state_path = Path(DATA_DIR) / "cookbook_state.json" + _cookbook_state_path = Path(COOKBOOK_STATE_FILE) def _mask_secret(value: str) -> str: if not value: diff --git a/routes/document_routes.py b/routes/document_routes.py index 09b7d8b1f..cb41108e0 100644 --- a/routes/document_routes.py +++ b/routes/document_routes.py @@ -11,6 +11,7 @@ from sqlalchemy import case, func, or_ from core.database import SessionLocal, Document, DocumentVersion from core.database import Session as DbSession from src.auth_helpers import get_current_user +from src.constants import MAIL_ATTACHMENTS_DIR logger = logging.getLogger(__name__) @@ -1542,10 +1543,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: # don't import from a routes file (cycle-prone). Same env override # as email_routes (ODYSSEUS_MAIL_ATTACHMENTS_DIR). from pathlib import Path as _Path - import os as _os - _DATA_DIR = _Path(__file__).resolve().parent.parent / "data" - _BASE = _os.environ.get("ODYSSEUS_MAIL_ATTACHMENTS_DIR", str(_DATA_DIR / "mail-attachments")) - _COMPOSE_DIR = _Path(_BASE) / "_compose" + _COMPOSE_DIR = _Path(MAIL_ATTACHMENTS_DIR) / "_compose" _COMPOSE_DIR.mkdir(parents=True, exist_ok=True) user = get_current_user(request) diff --git a/routes/email_helpers.py b/routes/email_helpers.py index e973a6b73..816aeea8e 100644 --- a/routes/email_helpers.py +++ b/routes/email_helpers.py @@ -254,16 +254,17 @@ def _cleanup_compose_uploads(tokens) -> None: pass -DATA_DIR = Path(__file__).resolve().parent.parent / "data" -SETTINGS_FILE = DATA_DIR / "settings.json" +from src.constants import DATA_DIR as _DATA_DIR, MAIL_ATTACHMENTS_DIR, SETTINGS_FILE as _SETTINGS_FILE, SCHEDULED_EMAILS_DB +DATA_DIR = Path(_DATA_DIR) +SETTINGS_FILE = Path(_SETTINGS_FILE) # Override at deploy time via ODYSSEUS_MAIL_ATTACHMENTS_DIR. Defaults to a # subdir of the install's data/ tree so the app works out-of-the-box without # a hardcoded /home// path. -ATTACHMENTS_DIR = Path(os.environ.get("ODYSSEUS_MAIL_ATTACHMENTS_DIR", str(DATA_DIR / "mail-attachments"))) +ATTACHMENTS_DIR = Path(MAIL_ATTACHMENTS_DIR) ATTACHMENTS_DIR.mkdir(parents=True, exist_ok=True) COMPOSE_UPLOADS_DIR = ATTACHMENTS_DIR / "_compose" COMPOSE_UPLOADS_DIR.mkdir(parents=True, exist_ok=True) -SCHEDULED_DB = DATA_DIR / "scheduled_emails.db" +SCHEDULED_DB = Path(SCHEDULED_EMAILS_DB) OWNER_SCOPED_EMAIL_CACHE_TABLES = { diff --git a/routes/email_routes.py b/routes/email_routes.py index 7ab033b04..1a1f9b701 100644 --- a/routes/email_routes.py +++ b/routes/email_routes.py @@ -32,6 +32,7 @@ from email.mime.multipart import MIMEMultipart from fastapi import APIRouter, Query, UploadFile, File, BackgroundTasks, HTTPException, Depends, Request from fastapi.responses import FileResponse +from src.constants import DATA_DIR from src.llm_core import llm_call_async from src.upload_limits import read_upload_limited @@ -2904,7 +2905,7 @@ def setup_email_routes(): from pathlib import Path as _P import json as _json _slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default")) - path = _P(f"data/email_urgency_state_{_slug}.json") + path = _P(DATA_DIR) / f"email_urgency_state_{_slug}.json" if not path.exists(): return {"total_unread": 0, "total_urgent": 0, "max_score": 0, "per_uid": {}} try: diff --git a/routes/embedding_routes.py b/routes/embedding_routes.py index d79fe91f1..a237e0b4c 100644 --- a/routes/embedding_routes.py +++ b/routes/embedding_routes.py @@ -7,12 +7,12 @@ import logging import asyncio from pathlib import Path from fastapi import APIRouter, HTTPException, Form, Depends -from core.constants import BASE_DIR +from core.constants import EMBEDDING_ENDPOINT_FILE, FASTEMBED_CACHE_DIR from core.middleware import require_admin logger = logging.getLogger(__name__) -_ENDPOINT_FILE = os.path.join(BASE_DIR, "data", "embedding_endpoint.json") +_ENDPOINT_FILE = EMBEDDING_ENDPOINT_FILE # Track in-progress downloads _downloading: dict = {} @@ -35,13 +35,7 @@ def _cache_dir() -> str: default lived in /tmp, which many systems wipe on reboot — forcing a full re-download of the embedding model after every restart. """ - env = os.environ.get("FASTEMBED_CACHE_PATH") - if env: - return env - return os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), - "data", "fastembed_cache", - ) + return FASTEMBED_CACHE_DIR def _model_cache_name(hf_source: str) -> str: diff --git a/routes/emoji_routes.py b/routes/emoji_routes.py index 76f6abad1..57fd0338f 100644 --- a/routes/emoji_routes.py +++ b/routes/emoji_routes.py @@ -18,9 +18,11 @@ import httpx from fastapi import APIRouter from fastapi.responses import Response +from src.constants import EMOJI_CACHE_DIR + logger = logging.getLogger(__name__) -_CACHE_DIR = Path(__file__).resolve().parent.parent / "data" / "emoji_cache" +_CACHE_DIR = Path(EMOJI_CACHE_DIR) # OpenMoji "black" set = monochrome line-art SVGs. Filenames are the codepoints # in UPPERCASE (FE0F dropped, same as we compute), '-' joined. _OPENMOJI_BASE = "https://cdn.jsdelivr.net/npm/openmoji@15.0.0/black/svg" diff --git a/routes/gallery_routes.py b/routes/gallery_routes.py index 0e3c68fa0..6f3427eed 100644 --- a/routes/gallery_routes.py +++ b/routes/gallery_routes.py @@ -14,6 +14,7 @@ from core.database import SessionLocal, GalleryImage, GalleryAlbum, ModelEndpoin from core.database import Session as DbSession from src.auth_helpers import get_current_user, require_privilege from src.upload_limits import read_upload_limited +from src.constants import GENERATED_IMAGES_DIR from routes.gallery_helpers import ( GalleryPatch, _extract_exif, _image_to_dict, _owner_filter, _human_size, @@ -33,7 +34,7 @@ def _sanitize_gallery_filename(filename: str) -> str: return safe_name -GALLERY_IMAGE_DIR = Path("data/generated_images") +GALLERY_IMAGE_DIR = Path(GENERATED_IMAGES_DIR) def _gallery_image_path(filename: str) -> Path: @@ -133,7 +134,7 @@ def setup_gallery_routes() -> APIRouter: return {"ok": False, "duplicate": True, "filename": existing.filename, "id": existing.id, "message": "Duplicate photo skipped"} - img_dir = Path("data/generated_images") + img_dir = Path(GENERATED_IMAGES_DIR) img_dir.mkdir(parents=True, exist_ok=True) ext = file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else "png" @@ -199,7 +200,7 @@ def setup_gallery_routes() -> APIRouter: raise HTTPException(400, "No image provided") content = await read_upload_limited(file, GALLERY_UPLOAD_MAX_BYTES, "Gallery replacement") - img_dir = Path("data/generated_images") + img_dir = Path(GENERATED_IMAGES_DIR) img_dir.mkdir(parents=True, exist_ok=True) img_path = img_dir / _sanitize_gallery_filename(img.filename) img_path.write_bytes(content) diff --git a/routes/mcp_routes.py b/routes/mcp_routes.py index e3a73c8fa..ca2722b5b 100644 --- a/routes/mcp_routes.py +++ b/routes/mcp_routes.py @@ -13,7 +13,7 @@ import httpx from core.database import McpServer, SessionLocal from core.middleware import require_admin -from src.constants import DATA_DIR +from src.constants import DATA_DIR, MCP_OAUTH_DIR from src.mcp_manager import McpManager logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ router = APIRouter(prefix="/api/mcp", tags=["mcp"]) def _mcp_oauth_base_dir() -> Path: """Directory that may contain OAuth files managed by Odysseus.""" - return (Path(DATA_DIR) / "mcp_oauth").resolve(strict=False) + return Path(MCP_OAUTH_DIR).resolve(strict=False) def _resolve_mcp_oauth_path(raw_path, field_name: str) -> str: diff --git a/routes/note_routes.py b/routes/note_routes.py index 3332c1b78..22449f1e4 100644 --- a/routes/note_routes.py +++ b/routes/note_routes.py @@ -11,6 +11,7 @@ from pydantic import BaseModel from core.database import SessionLocal, Note from src.auth_helpers import get_current_user +from src.constants import DATA_DIR from sqlalchemy.orm.attributes import flag_modified logger = logging.getLogger(__name__) @@ -170,7 +171,7 @@ async def dispatch_reminder( from datetime import datetime as _dt, timezone as _tz, timedelta as _td from pathlib import Path as _P _slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default")) - cache_path = _P(f"data/note_pings_{_slug}.json") + cache_path = _P(DATA_DIR) / f"note_pings_{_slug}.json" if cache_path.exists(): cache = _json.loads(cache_path.read_text(encoding="utf-8")) last = cache.get(cache_key) @@ -523,7 +524,7 @@ async def dispatch_reminder( _STATE = cache_path if _STATE is None: _slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default")) - _STATE = _P(f"data/note_pings_{_slug}.json") + _STATE = _P(DATA_DIR) / f"note_pings_{_slug}.json" _STATE.parent.mkdir(parents=True, exist_ok=True) try: _cache = cache or (_json.loads(_STATE.read_text(encoding="utf-8")) if _STATE.exists() else {}) diff --git a/routes/personal_routes.py b/routes/personal_routes.py index e47fbbd7c..4ef3219fc 100644 --- a/routes/personal_routes.py +++ b/routes/personal_routes.py @@ -6,13 +6,13 @@ import uuid from typing import List, Tuple from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends from src.request_models import DirectoryRequest -from core.constants import BASE_DIR, PERSONAL_DIR +from core.constants import BASE_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR from src.rag_singleton import get_rag_manager from src.auth_helpers import require_privilege, require_user from core.middleware import require_admin from src.upload_handler import secure_filename -UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads") +UPLOADS_DIR = PERSONAL_UPLOADS_DIR MAX_PERSONAL_UPLOAD_BYTES = int( os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024)) ) diff --git a/routes/prefs_routes.py b/routes/prefs_routes.py index f58049c26..f2a778c2d 100644 --- a/routes/prefs_routes.py +++ b/routes/prefs_routes.py @@ -4,8 +4,9 @@ import os from typing import Optional from fastapi import APIRouter, Request from src.auth_helpers import get_current_user +from src.constants import USER_PREFS_FILE -PREFS_FILE = os.path.join("data", "user_prefs.json") +PREFS_FILE = USER_PREFS_FILE def _load(): diff --git a/routes/research_routes.py b/routes/research_routes.py index 569dad3e9..c48ba3b5d 100644 --- a/routes/research_routes.py +++ b/routes/research_routes.py @@ -14,6 +14,7 @@ from fastapi.responses import HTMLResponse, StreamingResponse from pydantic import BaseModel, Field from src.endpoint_resolver import resolve_endpoint from src.auth_helpers import _auth_disabled, get_current_user +from src.constants import DEEP_RESEARCH_DIR _SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$") @@ -100,7 +101,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: if entry is not None: return entry.get("owner", "") == user # Task no longer in memory — check the persisted JSON. - path = Path("data/deep_research") / f"{session_id}.json" + path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json" if not path.exists(): return False try: @@ -164,7 +165,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: def _assert_owns_research(session_id: str, user: str) -> None: """404-not-403 ownership gate for a research session's on-disk JSON. Use BEFORE returning any data or mutating the file.""" - path = Path("data/deep_research") / f"{session_id}.json" + path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json" if not path.exists(): raise HTTPException(404, "Research not found") try: @@ -227,7 +228,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: ): user = _require_user(request) """List all completed research for the Library panel.""" - data_dir = Path("data/deep_research") + data_dir = Path(DEEP_RESEARCH_DIR) items = [] for p in data_dir.glob("*.json"): try: @@ -277,7 +278,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: summary, stats — used by the Library preview panel.""" user = _require_user(request) _validate_session_id(session_id) - path = Path("data/deep_research") / f"{session_id}.json" + path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json" if not path.exists(): raise HTTPException(404, "Research not found") try: @@ -294,7 +295,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: """Soft-archive / restore a research report (sets `archived` in its JSON).""" user = _require_user(request) _validate_session_id(session_id) - path = Path("data/deep_research") / f"{session_id}.json" + path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json" if not path.exists(): raise HTTPException(404, "Research not found") try: @@ -314,7 +315,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: """Delete a research result from disk.""" user = _require_user(request) _validate_session_id(session_id) - data_dir = Path("data/deep_research") + data_dir = Path(DEEP_RESEARCH_DIR) json_path = data_dir / f"{session_id}.json" deleted = False if json_path.exists(): @@ -496,7 +497,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: raise HTTPException(404, "No research found for this session") result = research_handler.get_result(session_id) if result is None: - p = Path("data/deep_research") / f"{session_id}.json" + p = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json" if p.exists(): d = json.loads(p.read_text(encoding="utf-8")) return { @@ -536,7 +537,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: sources = research_handler.get_sources(session_id) or [] query = "" - path = Path("data/deep_research") / f"{session_id}.json" + path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json" if path.exists(): try: disk = json.loads(path.read_text(encoding="utf-8")) diff --git a/routes/task_routes.py b/routes/task_routes.py index bd2090a57..eef0351fc 100644 --- a/routes/task_routes.py +++ b/routes/task_routes.py @@ -13,6 +13,7 @@ from pydantic import BaseModel from core.database import SessionLocal, ScheduledTask, TaskRun from core.constants import internal_api_base from src.auth_helpers import get_current_user +from src.constants import DATA_DIR, EMAIL_URGENCY_CACHE_DIR from src.task_scheduler import compute_next_run, HOUSEKEEPING_DEFAULTS from routes.prefs_routes import _load_for_user, _save_for_user @@ -621,7 +622,7 @@ def setup_task_routes(task_scheduler) -> APIRouter: removed_files = 0 if action == "check_email_urgency": - cache_dir = Path("data/email_urgency_cache") + cache_dir = Path(EMAIL_URGENCY_CACHE_DIR) if cache_dir.exists(): for child in cache_dir.glob("*.json"): try: @@ -630,7 +631,7 @@ def setup_task_routes(task_scheduler) -> APIRouter: except Exception: pass owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (user or "default")) - for state_path in [Path(f"data/email_urgency_state_{owner_slug}.json")]: + for state_path in [Path(DATA_DIR) / f"email_urgency_state_{owner_slug}.json"]: try: if state_path.exists(): state_path.unlink() diff --git a/routes/vault_routes.py b/routes/vault_routes.py index c6258bb5c..7e97500f0 100644 --- a/routes/vault_routes.py +++ b/routes/vault_routes.py @@ -17,10 +17,11 @@ from pydantic import BaseModel from core.middleware import require_admin from core.platform_compat import IS_WINDOWS, safe_chmod, which_tool +from src.constants import VAULT_FILE as _VAULT_FILE logger = logging.getLogger(__name__) -VAULT_FILE = Path("data/vault.json") +VAULT_FILE = Path(_VAULT_FILE) def _find_bw() -> str: diff --git a/scripts/claim_ownerless.py b/scripts/claim_ownerless.py index fd275229d..1682db11b 100644 --- a/scripts/claim_ownerless.py +++ b/scripts/claim_ownerless.py @@ -13,6 +13,8 @@ import json sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from src.constants import MEMORY_FILE, SKILLS_FILE + def claim_json_entries(entries, owner): count = 0 @@ -35,8 +37,8 @@ def main(): # 1. Memories (JSON files) for label, path in [ - ("memory.json", "data/memory.json"), - ("skills.json", "data/skills.json"), + ("memory.json", MEMORY_FILE), + ("skills.json", SKILLS_FILE), ]: if not os.path.exists(path): print(f" {label}: not found, skipping") diff --git a/scripts/index_documents.py b/scripts/index_documents.py index 4117e586e..009212879 100644 --- a/scripts/index_documents.py +++ b/scripts/index_documents.py @@ -19,6 +19,9 @@ import sys from pathlib import Path from typing import List, Tuple +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from src.constants import PERSONAL_DIR + # Configure logging for the script logging.basicConfig( level=logging.INFO, @@ -45,7 +48,7 @@ def main(): rag_manager = RAGManager() # Directory to scan - docs_directory = "data/personal_docs" + docs_directory = PERSONAL_DIR directory_path = Path(docs_directory) # Check if directory exists diff --git a/scripts/migrate_faiss_to_chroma.py b/scripts/migrate_faiss_to_chroma.py index 68f3dcb1b..02fc5f9a2 100644 --- a/scripts/migrate_faiss_to_chroma.py +++ b/scripts/migrate_faiss_to_chroma.py @@ -63,10 +63,10 @@ def migrate_memories(): """Migrate memory vectors from FAISS to ChromaDB.""" from src.chroma_client import get_chroma_client from src.embeddings import get_embedding_client - from src.constants import DATA_DIR + from src.constants import MEMORY_VECTORS_DIR, MEMORY_FILE - ids_path = os.path.join(DATA_DIR, "memory_vectors", "ids.json") - memory_path = os.path.join(DATA_DIR, "memory.json") + ids_path = os.path.join(MEMORY_VECTORS_DIR, "ids.json") + memory_path = MEMORY_FILE if not os.path.exists(ids_path): logger.info("No memory FAISS index found, skipping memory migration") diff --git a/services/docs/service.py b/services/docs/service.py index 29a515842..5242aa5ce 100644 --- a/services/docs/service.py +++ b/services/docs/service.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from typing import List, Dict, Any from src.rag_manager import RAGManager +from src.constants import CHROMA_DIR @dataclass @@ -34,7 +35,7 @@ class DocsService: results = await service.query("what is async await?") """ - def __init__(self, persist_dir: str = "data/chroma"): + def __init__(self, persist_dir: str = CHROMA_DIR): self.rag = RAGManager(persist_directory=persist_dir) async def query(self, query: str, top_k: int = 5) -> List[DocChunk]: diff --git a/services/memory/service.py b/services/memory/service.py index 0a5b9b555..faf74ae13 100644 --- a/services/memory/service.py +++ b/services/memory/service.py @@ -8,6 +8,7 @@ import os from .memory import MemoryManager from .memory_vector import MemoryVectorStore from src.memory_provider import MemoryRecord, NativeMemoryProvider +from src.constants import DATA_DIR @dataclass @@ -38,7 +39,7 @@ class MemoryService: results = await service.recall("preferences") """ - def __init__(self, data_dir: str = "data"): + def __init__(self, data_dir: str = DATA_DIR): self.manager = MemoryManager(data_dir) self.vector_store = MemoryVectorStore(data_dir) if os.path.exists( os.path.join(data_dir, "memory_vectors") diff --git a/services/research/research_handler.py b/services/research/research_handler.py index 0a49c7230..bd4c6bb15 100644 --- a/services/research/research_handler.py +++ b/services/research/research_handler.py @@ -15,10 +15,11 @@ from pathlib import Path from typing import Optional, Dict from src.research_utils import is_low_quality +from src.constants import DEEP_RESEARCH_DIR logger = logging.getLogger(__name__) -RESEARCH_DATA_DIR = Path("data/deep_research") +RESEARCH_DATA_DIR = Path(DEEP_RESEARCH_DIR) class ResearchHandler: diff --git a/services/tts/tts_service.py b/services/tts/tts_service.py index 10b993f4a..e724434cb 100644 --- a/services/tts/tts_service.py +++ b/services/tts/tts_service.py @@ -9,6 +9,8 @@ import httpx from pathlib import Path from typing import Optional, Dict, Any +from src.constants import TTS_CACHE_DIR + logger = logging.getLogger(__name__) @@ -35,7 +37,7 @@ class TTSService: "endpoint:" — OpenAI-compatible /audio/speech via ModelEndpoint """ - def __init__(self, cache_dir: str = "data/tts_cache"): + def __init__(self, cache_dir: str = TTS_CACHE_DIR): self.cache_dir = Path(cache_dir) self.cache_dir.mkdir(parents=True, exist_ok=True) self._kokoro = None # lazy-init diff --git a/setup.py b/setup.py index b904e8670..81fcc87ab 100644 --- a/setup.py +++ b/setup.py @@ -12,19 +12,24 @@ import subprocess import sys BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.join(BASE_DIR, "data") +sys.path.insert(0, BASE_DIR) +from src.constants import ( + DATA_DIR, AUTH_FILE, UPLOAD_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR, + TTS_CACHE_DIR, GENERATED_IMAGES_DIR, DEEP_RESEARCH_DIR, CHROMA_DIR, + RAG_DIR, MEMORY_VECTORS_DIR, +) DIRS = [ DATA_DIR, - os.path.join(DATA_DIR, "uploads"), - os.path.join(DATA_DIR, "personal_docs"), - os.path.join(DATA_DIR, "personal_uploads"), - os.path.join(DATA_DIR, "tts_cache"), - os.path.join(DATA_DIR, "generated_images"), - os.path.join(DATA_DIR, "deep_research"), - os.path.join(DATA_DIR, "chroma"), - os.path.join(DATA_DIR, "rag"), - os.path.join(DATA_DIR, "memory_vectors"), + UPLOAD_DIR, + PERSONAL_DIR, + PERSONAL_UPLOADS_DIR, + TTS_CACHE_DIR, + GENERATED_IMAGES_DIR, + DEEP_RESEARCH_DIR, + CHROMA_DIR, + RAG_DIR, + MEMORY_VECTORS_DIR, os.path.join(BASE_DIR, "logs"), ] @@ -74,7 +79,7 @@ def _prompt_admin_credentials(): def create_default_admin(): """Create an initial admin user if none exists.""" - auth_path = os.path.join(DATA_DIR, "auth.json") + auth_path = AUTH_FILE if os.path.exists(auth_path): print(" [skip] auth.json already exists") return "exists" diff --git a/src/ai_interaction.py b/src/ai_interaction.py index 383560eed..a03a5b0ac 100644 --- a/src/ai_interaction.py +++ b/src/ai_interaction.py @@ -14,6 +14,8 @@ import uuid import time from typing import Dict, Optional, Tuple +from src.constants import GENERATED_IMAGES_DIR + logger = logging.getLogger(__name__) AI_CHAT_TIMEOUT = 120 # seconds for a single LLM call @@ -1715,7 +1717,7 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne # GPT image models always return b64_json; DALL-E may return url if img.get("b64_json"): - img_dir = Path("data/generated_images") + img_dir = Path(GENERATED_IMAGES_DIR) img_dir.mkdir(parents=True, exist_ok=True) filename = f"{uuid.uuid4().hex[:12]}.png" img_path = img_dir / filename @@ -1728,7 +1730,7 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne try: dl_resp = httpx.get(img["url"], timeout=60) if dl_resp.status_code == 200: - img_dir = Path("data/generated_images") + img_dir = Path(GENERATED_IMAGES_DIR) img_dir.mkdir(parents=True, exist_ok=True) filename = f"{uuid.uuid4().hex[:12]}.png" img_path = img_dir / filename diff --git a/src/bg_jobs.py b/src/bg_jobs.py index c103dfdfc..8e452106b 100644 --- a/src/bg_jobs.py +++ b/src/bg_jobs.py @@ -38,9 +38,10 @@ from core.platform_compat import ( pid_alive, ) -_DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) -_JOBS_DIR = _DATA_DIR / "bg_jobs" -_STORE = _DATA_DIR / "bg_jobs.json" +from src.constants import BG_JOBS_DIR, BG_JOBS_FILE + +_JOBS_DIR = Path(BG_JOBS_DIR) +_STORE = Path(BG_JOBS_FILE) # A job that runs longer than this is presumed stuck and reaped (the agent # still gets a "timed out" follow-up so nothing hangs forever). diff --git a/src/builtin_actions.py b/src/builtin_actions.py index 1e0c681fe..3fdeedc71 100644 --- a/src/builtin_actions.py +++ b/src/builtin_actions.py @@ -12,7 +12,8 @@ from typing import Tuple from src.auth_helpers import owner_filter from core.platform_compat import IS_WINDOWS, find_bash -from core.constants import DATA_DIR, internal_api_base +from core.constants import internal_api_base +from src.constants import DATA_DIR, DEEP_RESEARCH_DIR, TIDY_CALENDAR_STATE_FILE, EMAIL_URGENCY_CACHE_DIR, COOKBOOK_STATE_FILE logger = logging.getLogger(__name__) @@ -349,7 +350,7 @@ async def action_tidy_research(owner: str, **kwargs) -> Tuple[str, bool]: try: from pathlib import Path import json as _json - research_dir = Path("data/deep_research") + research_dir = Path(DEEP_RESEARCH_DIR) if not research_dir.exists(): raise TaskNoop("no research directory") files = list(research_dir.glob("*.json")) @@ -387,7 +388,7 @@ async def action_tidy_calendar(owner: str, **kwargs) -> Tuple[str, bool]: from core.database import SessionLocal, CalendarEvent from sqlalchemy import func - STATE_FILE = Path("data/tidy_calendar_state.json") + STATE_FILE = Path(TIDY_CALENDAR_STATE_FILE) last_watermark = None try: if STATE_FILE.exists(): @@ -1304,12 +1305,12 @@ async def action_ping_notes(owner: str, **kwargs) -> Tuple[str, bool]: # users' entries (review C4). Legacy path kept as fallback so a # single-user install (empty owner) doesn't lose its history. _owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default")) - STATE = _P(f"data/note_pings_{_owner_slug}.json") + STATE = _P(DATA_DIR) / f"note_pings_{_owner_slug}.json" STATE.parent.mkdir(parents=True, exist_ok=True) # One-time migration: if legacy global file exists and per-owner file # doesn't, seed from global (entries for OTHER owners still get pruned # on their first run — acceptable, prevents silent loss). - _legacy = _P("data/note_pings.json") + _legacy = _P(DATA_DIR) / "note_pings.json" if _legacy.exists() and not STATE.exists(): try: STATE.write_text(_legacy.read_text(encoding="utf-8"), encoding="utf-8") @@ -1466,8 +1467,8 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]: # notified_uids / urgency counts. Empty owner falls back to a generic # filename for single-user installs (matches prior behaviour). _owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default")) - STATE_PATH = _P(f"data/email_urgency_state_{_owner_slug}.json") - CACHE_DIR = _P("data/email_urgency_cache") + STATE_PATH = _P(DATA_DIR) / f"email_urgency_state_{_owner_slug}.json" + CACHE_DIR = _P(EMAIL_URGENCY_CACHE_DIR) CACHE_DIR.mkdir(parents=True, exist_ok=True) STATE_PATH.parent.mkdir(parents=True, exist_ok=True) AGE_CUTOFF = _dt.utcnow() - _td(days=7) @@ -2043,7 +2044,7 @@ async def action_cookbook_serve( except Exception: end_after_min = 0 - state_path = Path(DATA_DIR) / "cookbook_state.json" + state_path = Path(COOKBOOK_STATE_FILE) try: state = json.loads(state_path.read_text(encoding="utf-8")) if state_path.exists() else {} except Exception: diff --git a/src/config.py b/src/config.py index 58a5c466e..8b9bd5148 100644 --- a/src/config.py +++ b/src/config.py @@ -4,6 +4,8 @@ from typing import List, Optional from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic import Field, field_validator +from src.constants import DATA_DIR as _DATA_DIR_CONST + # Cross-platform OS flag, exposed here so callers can `from src.config import # IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported # from core.platform_compat, to keep this dependency-light config module from @@ -20,13 +22,13 @@ class DataConfig(BaseSettings): base_dir: Path = Field(default=Path(__file__).parent.parent, description="Base directory for the application") # Data paths - data_dir: Path = Field(default=Path("data"), description="Main data directory") - uploads_dir: Path = Field(default=Path("data/uploads"), description="Directory for uploaded files") - sessions_file: Path = Field(default=Path("data/sessions.json"), description="Sessions storage file") - memory_file: Path = Field(default=Path("data/memory.json"), description="Memory storage file") - memory_doc: Path = Field(default=Path("data/memory_doc.md"), description="Memory document file") - personal_dir: Path = Field(default=Path("data/personal_docs"), description="Personal documents directory") - runbook_dir: Path = Field(default=Path("data/personal_docs/runbook"), description="Runbook directory") + data_dir: Path = Field(default=Path(_DATA_DIR_CONST), description="Main data directory") + uploads_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "uploads", description="Directory for uploaded files") + sessions_file: Path = Field(default=Path(_DATA_DIR_CONST) / "sessions.json", description="Sessions storage file") + memory_file: Path = Field(default=Path(_DATA_DIR_CONST) / "memory.json", description="Memory storage file") + memory_doc: Path = Field(default=Path(_DATA_DIR_CONST) / "memory_doc.md", description="Memory document file") + personal_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "personal_docs", description="Personal documents directory") + runbook_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "personal_docs" / "runbook", description="Runbook directory") # Upload settings max_upload_size: int = Field(default=10 * 1024 * 1024, description="Maximum upload size in bytes (10MB)") @@ -139,7 +141,7 @@ class AppConfig(BaseSettings): base_dir = Path(__file__).parent.parent # Convert string paths to Path objects relative to base_dir - data_dir = base_dir / "data" + data_dir = Path(_DATA_DIR_CONST) # Get values from the input dict or use defaults max_upload_size = v.get("max_upload_size", 10 * 1024 * 1024) if isinstance(v, dict) else 10 * 1024 * 1024 diff --git a/src/constants.py b/src/constants.py index afe9db88a..3f58eba26 100644 --- a/src/constants.py +++ b/src/constants.py @@ -7,9 +7,12 @@ APP_VERSION = "1.0.0" # Base paths BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/" STATIC_DIR = os.path.join(BASE_DIR, "static") -DATA_DIR = os.path.join(BASE_DIR, "data") +DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", os.path.join(BASE_DIR, "data")) # Data file paths +# Single source of truth: every persisted file/dir lives under DATA_DIR, which +# is the ONLY place ODYSSEUS_DATA_DIR is read. Import these constants instead of +# re-deriving paths from __file__ or a relative "data" literal. SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json") MEMORY_FILE = os.path.join(DATA_DIR, "memory.json") MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md") @@ -18,6 +21,41 @@ RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook") UPLOAD_DIR = os.path.join(DATA_DIR, "uploads") FEATURES_FILE = os.path.join(DATA_DIR, "features.json") SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json") +AUTH_FILE = os.path.join(DATA_DIR, "auth.json") +USER_PREFS_FILE = os.path.join(DATA_DIR, "user_prefs.json") +PRESETS_FILE = os.path.join(DATA_DIR, "presets.json") +INTEGRATIONS_FILE = os.path.join(DATA_DIR, "integrations.json") +CONTACTS_FILE = os.path.join(DATA_DIR, "contacts.json") +APP_KEY_FILE = os.path.join(DATA_DIR, ".app_key") +EMBEDDING_ENDPOINT_FILE = os.path.join(DATA_DIR, "embedding_endpoint.json") +COOKBOOK_STATE_FILE = os.path.join(DATA_DIR, "cookbook_state.json") +BG_JOBS_FILE = os.path.join(DATA_DIR, "bg_jobs.json") +VAULT_FILE = os.path.join(DATA_DIR, "vault.json") +TIDY_CALENDAR_STATE_FILE = os.path.join(DATA_DIR, "tidy_calendar_state.json") +SKILLS_FILE = os.path.join(DATA_DIR, "skills.json") +APP_DB = os.path.join(DATA_DIR, "app.db") +SCHEDULED_EMAILS_DB = os.path.join(DATA_DIR, "scheduled_emails.db") +EMAIL_CACHE_DB = os.path.join(DATA_DIR, "email_cache.db") + +# Data subdirectories +PERSONAL_UPLOADS_DIR = os.path.join(DATA_DIR, "personal_uploads") +EMOJI_CACHE_DIR = os.path.join(DATA_DIR, "emoji_cache") +RAG_DIR = os.path.join(DATA_DIR, "rag") +CHROMA_DIR = os.path.join(DATA_DIR, "chroma") +BG_JOBS_DIR = os.path.join(DATA_DIR, "bg_jobs") +DEEP_RESEARCH_DIR = os.path.join(DATA_DIR, "deep_research") +MCP_OAUTH_DIR = os.path.join(DATA_DIR, "mcp_oauth") +GENERATED_IMAGES_DIR = os.path.join(DATA_DIR, "generated_images") +TTS_CACHE_DIR = os.path.join(DATA_DIR, "tts_cache") +EMAIL_URGENCY_CACHE_DIR = os.path.join(DATA_DIR, "email_urgency_cache") +SKILLS_DIR = os.path.join(DATA_DIR, "skills") +GALLERY_DIR = os.path.join(DATA_DIR, "gallery") +GALLERY_UPLOADS_DIR = os.path.join(DATA_DIR, "gallery_uploads") +MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors") + +# Paths with an intentional dedicated env override, defaulting under DATA_DIR. +MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments")) +FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache")) # Agent tool output limits (single source of truth — imported by tool_execution.py, # tool_implementations.py, agent_tools.py, and any other module that needs them) @@ -44,3 +82,22 @@ CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24")) # Default parameters DEFAULT_TEMPERATURE = 1.0 DEFAULT_MAX_TOKENS = 0 + + +def internal_api_base() -> str: + """Base URL for in-process loopback calls to Odysseus's own API. + + Agent tools and background jobs reach admin-gated routes by calling the + running server over HTTP. Resolution order: + 1. ODYSSEUS_INTERNAL_BASE - explicit override (e.g. behind a TLS proxy). + 2. APP_PORT - http://127.0.0.1:$APP_PORT (docker-compose). + 3. Fallback http://127.0.0.1:7000 - legacy default. + + 127.0.0.1 (not "localhost") avoids IPv6/DNS ambiguity for a strictly-local + call. Without this, loopback tools fail with "All connection attempts + failed" whenever the server is not on port 7000. + """ + override = os.environ.get("ODYSSEUS_INTERNAL_BASE") + if override: + return override.rstrip("/") + return f"http://127.0.0.1:{os.environ.get('APP_PORT', '7000')}" diff --git a/src/cookbook_serve_lifecycle.py b/src/cookbook_serve_lifecycle.py index 6948763b7..e30ddfd09 100644 --- a/src/cookbook_serve_lifecycle.py +++ b/src/cookbook_serve_lifecycle.py @@ -19,7 +19,8 @@ import time from pathlib import Path import httpx -from core.constants import DATA_DIR, internal_api_base +from core.constants import internal_api_base +from src.constants import COOKBOOK_STATE_FILE logger = logging.getLogger(__name__) @@ -130,7 +131,7 @@ async def _stop_serve(session_id: str, remote_host: str = "", ssh_port: str = "" async def _tick() -> None: - state_path = Path(DATA_DIR) / "cookbook_state.json" + state_path = Path(COOKBOOK_STATE_FILE) if not state_path.exists(): return try: diff --git a/src/embeddings.py b/src/embeddings.py index f2d0c5934..85a55c386 100644 --- a/src/embeddings.py +++ b/src/embeddings.py @@ -14,6 +14,8 @@ Set EMBEDDING_URL in .env, e.g.: import os +from src.constants import FASTEMBED_CACHE_DIR, EMBEDDING_ENDPOINT_FILE + # Windows: force HuggingFace/fastembed to COPY model files rather than symlink # them. On a network-share/UNC cache dir Windows can't follow HF's symlinks # ([WinError 1463] "symbolic link cannot be followed"), so ONNX fails to load the @@ -117,10 +119,7 @@ class FastEmbedClient: # Persistent cache under data/ so the model survives reboots and so # the download lands exactly where the admin panel's _is_downloaded() # check looks (both default to this same path). - cache_dir = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), - "data", "fastembed_cache", - ) + cache_dir = FASTEMBED_CACHE_DIR os.makedirs(cache_dir, exist_ok=True) # Windows self-heal: the HuggingFace-hub cache stores model files as # symlinks (snapshots//model.onnx -> ../../blobs/). On a @@ -188,10 +187,7 @@ class FastEmbedClient: def _load_persisted_endpoint() -> dict: """Load the custom embedding endpoint saved from the admin panel.""" try: - endpoint_file = os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), - "data", "embedding_endpoint.json", - ) + endpoint_file = EMBEDDING_ENDPOINT_FILE if os.path.exists(endpoint_file): import json data = json.loads(open(endpoint_file, encoding="utf-8").read()) diff --git a/src/event_bus.py b/src/event_bus.py index 8bdb889a0..9b22d7821 100644 --- a/src/event_bus.py +++ b/src/event_bus.py @@ -12,6 +12,8 @@ import os from datetime import datetime from typing import Optional +from src.constants import AUTH_FILE + logger = logging.getLogger(__name__) _task_scheduler = None @@ -54,9 +56,7 @@ def _resolve_event_owner(owner: Optional[str]) -> Optional[str]: return owner try: - from src.constants import DATA_DIR - - auth_path = os.path.join(DATA_DIR, "auth.json") + auth_path = AUTH_FILE with open(auth_path, "r", encoding="utf-8") as f: users = (json.load(f).get("users") or {}) for username, data in users.items(): diff --git a/src/generated_images.py b/src/generated_images.py index 2e7994175..d40022d60 100644 --- a/src/generated_images.py +++ b/src/generated_images.py @@ -4,8 +4,10 @@ from pathlib import Path from fastapi import HTTPException +from src.constants import GENERATED_IMAGES_DIR -GENERATED_IMAGE_DIR = Path("data/generated_images") + +GENERATED_IMAGE_DIR = Path(GENERATED_IMAGES_DIR) GENERATED_IMAGE_RE = re.compile( r"^[a-f0-9]{8,64}\.(png|jpg|jpeg|webp|gif|mp4|mov|webm|mkv|m4v)$" ) diff --git a/src/integrations.py b/src/integrations.py index 8ff0aa065..aeeb6795d 100644 --- a/src/integrations.py +++ b/src/integrations.py @@ -10,10 +10,11 @@ import httpx from core.atomic_io import atomic_write_json from core.platform_compat import safe_chmod from src.secret_storage import decrypt, encrypt, is_encrypted +from src.constants import DATA_DIR, INTEGRATIONS_FILE, SETTINGS_FILE log = logging.getLogger(__name__) -DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "integrations.json") +DATA_FILE = INTEGRATIONS_FILE # --------------------------------------------------------------------------- # Presets @@ -471,7 +472,7 @@ def get_integrations_prompt() -> str: def migrate_from_settings() -> None: """If data/settings.json has miniflux_url and miniflux_api_key, create a Miniflux integration and clear those keys from settings.""" - settings_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "settings.json") + settings_path = SETTINGS_FILE if not os.path.exists(settings_path): return diff --git a/src/rag_manager.py b/src/rag_manager.py index 8a7767761..a41608ecf 100644 --- a/src/rag_manager.py +++ b/src/rag_manager.py @@ -7,6 +7,8 @@ A thin wrapper around VectorRAG for backward compatibility and additional featur import logging from typing import List, Dict, Any, Optional +from src.constants import CHROMA_DIR + # Try to import from different possible locations try: from rag_vector import VectorRAG @@ -24,7 +26,7 @@ class RAGManager: Most methods delegate directly to VectorRAG. """ - def __init__(self, persist_directory: str = "data/chroma"): + def __init__(self, persist_directory: str = CHROMA_DIR): """Initialize the RAGManager with VectorRAG.""" self.vector_rag = VectorRAG(persist_directory=persist_directory) logger.info("RAGManager initialized as wrapper for VectorRAG") diff --git a/src/rag_singleton.py b/src/rag_singleton.py index eb90e847a..7bc5d74b4 100644 --- a/src/rag_singleton.py +++ b/src/rag_singleton.py @@ -6,6 +6,8 @@ import logging import time from pathlib import Path +from src.constants import RAG_DIR + logger = logging.getLogger(__name__) rag_instance = None @@ -41,8 +43,7 @@ def get_rag_manager(): try: from src.rag_vector import VectorRAG - base_dir = Path(__file__).parent.parent - persist_dir = os.path.join(base_dir, "data", "rag") + persist_dir = RAG_DIR rag_instance = VectorRAG(persist_directory=persist_dir) if not rag_instance.healthy: diff --git a/src/rag_vector.py b/src/rag_vector.py index b10680c45..fc66c82e1 100644 --- a/src/rag_vector.py +++ b/src/rag_vector.py @@ -12,6 +12,8 @@ import re import logging import numpy as np from typing import List, Dict, Any, Optional, Set + +from src.constants import CHROMA_DIR from pathlib import Path from src.embedding_lanes import ( @@ -51,7 +53,7 @@ def _generate_doc_id(text: str, owner: str = "") -> str: class VectorRAG: """RAG system using ChromaDB vector storage with hybrid search.""" - def __init__(self, persist_directory: str = "data/chroma"): + def __init__(self, persist_directory: str = CHROMA_DIR): self.persist_directory = persist_directory self._collection = None self._model = None diff --git a/src/research_handler.py b/src/research_handler.py index 2fc369195..b996f089f 100644 --- a/src/research_handler.py +++ b/src/research_handler.py @@ -16,10 +16,11 @@ from pathlib import Path from typing import Optional, Dict from src.research_utils import strip_thinking, is_low_quality +from src.constants import DEEP_RESEARCH_DIR logger = logging.getLogger(__name__) -RESEARCH_DATA_DIR = Path("data/deep_research") +RESEARCH_DATA_DIR = Path(DEEP_RESEARCH_DIR) _RESEARCH_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9-]{1,128}$") diff --git a/src/secret_storage.py b/src/secret_storage.py index 15f02f26a..c4a08be1d 100644 --- a/src/secret_storage.py +++ b/src/secret_storage.py @@ -25,10 +25,11 @@ from pathlib import Path from cryptography.fernet import Fernet, InvalidToken from core.platform_compat import safe_chmod +from src.constants import APP_KEY_FILE logger = logging.getLogger(__name__) -_KEY_PATH = Path(__file__).resolve().parent.parent / "data" / ".app_key" +_KEY_PATH = Path(APP_KEY_FILE) _PREFIX = "enc:" _fernet: Fernet | None = None diff --git a/src/tool_execution.py b/src/tool_execution.py index b804376c7..1f8fa5c92 100644 --- a/src/tool_execution.py +++ b/src/tool_execution.py @@ -20,14 +20,14 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Tuple from src.tool_security import is_public_blocked_tool, owner_is_admin_or_single_user from src.tool_policy import ToolPolicy -from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, MAX_DIFF_LINES +from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, MAX_DIFF_LINES, DATA_DIR # Persistent working directory for agent subprocesses. # Resolves to /data, which is the bind-mounted volume in Docker # (/app/data) and the local data directory for manual installs. # Using this as cwd and HOME prevents the agent from silently creating files # in ephemeral container layers that are lost on the next rebuild. -_AGENT_WORKDIR = str(pathlib.Path(__file__).parent.parent / "data") +_AGENT_WORKDIR = DATA_DIR def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]: diff --git a/src/tool_implementations.py b/src/tool_implementations.py index 1c1302042..81b7054c6 100644 --- a/src/tool_implementations.py +++ b/src/tool_implementations.py @@ -12,7 +12,7 @@ import os import re from typing import Any, Dict, List, Optional -from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS +from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE from core.constants import internal_api_base @@ -4057,7 +4057,7 @@ async def do_manage_research(content: str, owner: Optional[str] = None) -> Dict: args = {} action = (args.get("action") or "list").lower() rid = (args.get("id") or args.get("session_id") or args.get("research_id") or "").strip() - data_dir = _Path("data/deep_research") + data_dir = _Path(DEEP_RESEARCH_DIR) # SECURITY: the research id is interpolated straight into a filesystem # path (data/deep_research/.json) for read AND delete. Without this @@ -4302,7 +4302,7 @@ async def do_manage_contact(content: str, owner: Optional[str] = None) -> Dict: def _load_vault_config() -> Dict: """Load Vaultwarden config from data/vault.json.""" from pathlib import Path - p = Path("data/vault.json") + p = Path(VAULT_FILE) if p.exists(): try: return json.loads(p.read_text(encoding="utf-8")) @@ -4456,7 +4456,7 @@ async def do_vault_unlock(content: str, owner: Optional[str] = None) -> Dict: # Save session to vault.json from pathlib import Path - p = Path("data/vault.json") + p = Path(VAULT_FILE) cfg = {} if p.exists(): try: diff --git a/tests/test_companion_pairing.py b/tests/test_companion_pairing.py index c4abcd51c..8121ee76f 100644 --- a/tests/test_companion_pairing.py +++ b/tests/test_companion_pairing.py @@ -118,10 +118,11 @@ def test_pairing_payload_shape(): @pytest.mark.parametrize("payload", ["[]", '{"users": []}']) def test_find_admin_user_ignores_invalid_auth_shape(tmp_path, monkeypatch, payload): - data_dir = tmp_path / "data" - data_dir.mkdir() - (data_dir / "auth.json").write_text(payload) - monkeypatch.chdir(tmp_path) + auth_file = tmp_path / "auth.json" + auth_file.write_text(payload) + # find_admin_user reads the import-time AUTH_FILE constant, so redirect that + # rather than relying on cwd. + monkeypatch.setattr(P, "AUTH_FILE", str(auth_file)) assert P.find_admin_user() is None diff --git a/tests/test_cookbook_state_path.py b/tests/test_cookbook_state_path.py deleted file mode 100644 index eb239988e..000000000 --- a/tests/test_cookbook_state_path.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Guard: cookbook_state.json must be located via DATA_DIR, not hardcoded /app/data -(which breaks native runs) or a relative os.environ fallback.""" -import pathlib - -ROOT = pathlib.Path(__file__).resolve().parent.parent -FILES = [ - "src/cookbook_serve_lifecycle.py", - "src/builtin_actions.py", - "routes/codex_routes.py", - "routes/cookbook_routes.py", -] - - -def test_no_hardcoded_app_data_cookbook_state(): - for rel in FILES: - text = (ROOT / rel).read_text(encoding="utf-8") - for ln in text.splitlines(): - if ln.strip().startswith("#"): - continue - assert "/app/data/cookbook_state" not in ln, f"{rel}: hardcoded /app/data: {ln.strip()}" - assert 'os.environ.get("DATA_DIR"' not in ln, f"{rel}: relative DATA_DIR env fallback: {ln.strip()}" - - -def test_cookbook_state_uses_datadir_constant(): - # Each file that references cookbook_state.json should import the DATA_DIR constant. - for rel in FILES: - text = (ROOT / rel).read_text(encoding="utf-8") - if "cookbook_state.json" in text: - assert "from core.constants import DATA_DIR" in text, f"{rel}: missing DATA_DIR import" diff --git a/tests/test_research_owner_scope_routes.py b/tests/test_research_owner_scope_routes.py index 06253ab7a..18eef3311 100644 --- a/tests/test_research_owner_scope_routes.py +++ b/tests/test_research_owner_scope_routes.py @@ -11,6 +11,16 @@ from fastapi import HTTPException from routes.research_routes import setup_research_routes +@pytest.fixture(autouse=True) +def _redirect_research_dir(tmp_path, monkeypatch): + # Deep-research paths are resolved from an import-time constant now, so chdir + # no longer redirects them. Point the constant the routes read at the temp dir. + monkeypatch.setattr( + "routes.research_routes.DEEP_RESEARCH_DIR", + str(tmp_path / "data" / "deep_research"), + ) + + def _request(user: str): return SimpleNamespace(state=SimpleNamespace(current_user=user)) diff --git a/tests/test_security_regressions.py b/tests/test_security_regressions.py index 2ca468fc7..57b495dba 100644 --- a/tests/test_security_regressions.py +++ b/tests/test_security_regressions.py @@ -946,7 +946,7 @@ def _import_mcp_routes(): def test_mcp_oauth_paths_resolve_under_data_dir(tmp_path, monkeypatch): mcp_routes = _import_mcp_routes() - monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data")) + monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth")) resolved = Path(mcp_routes._resolve_mcp_oauth_path("gmail/credentials.json", "token_file")) @@ -963,7 +963,7 @@ def test_mcp_oauth_paths_reject_escapes(tmp_path, monkeypatch, raw_path): from fastapi import HTTPException mcp_routes = _import_mcp_routes() - monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data")) + monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth")) with pytest.raises(HTTPException) as exc: mcp_routes._resolve_mcp_oauth_path(raw_path, "token_file") @@ -974,7 +974,7 @@ def test_mcp_oauth_filename_join_cannot_escape_base(tmp_path, monkeypatch): from fastapi import HTTPException mcp_routes = _import_mcp_routes() - monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data")) + monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth")) safe_dir = mcp_routes._resolve_mcp_oauth_path("gmail", "dir") with pytest.raises(HTTPException): @@ -983,7 +983,7 @@ def test_mcp_oauth_filename_join_cannot_escape_base(tmp_path, monkeypatch): def test_mcp_oauth_config_sanitizes_paths_and_env(tmp_path, monkeypatch): mcp_routes = _import_mcp_routes() - monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data")) + monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth")) cfg = mcp_routes._sanitize_mcp_oauth_config({ "provider": "google", diff --git a/tests/test_setup_admin_user.py b/tests/test_setup_admin_user.py index f3edda53a..9ecfb416b 100644 --- a/tests/test_setup_admin_user.py +++ b/tests/test_setup_admin_user.py @@ -13,7 +13,7 @@ def _load_setup_module(): def test_create_default_admin_normalizes_env_username(tmp_path, monkeypatch): setup_module = _load_setup_module() - monkeypatch.setattr(setup_module, "DATA_DIR", str(tmp_path)) + monkeypatch.setattr(setup_module, "AUTH_FILE", str(tmp_path / "auth.json")) monkeypatch.setenv("ODYSSEUS_ADMIN_USER", " AdminUser ") monkeypatch.setenv("ODYSSEUS_ADMIN_PASSWORD", "temporary-password")