refactor(constants): single source of truth for data dir (#3368)

* refactor(constants): single source of truth for data dir + merge core/src constants

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* docs(contributing): use named src.constants for data paths, drop core/constants references

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mike
2026-06-08 09:58:52 +02:00
committed by GitHub
parent adc6ac9394
commit ac94885c84
56 changed files with 279 additions and 243 deletions
+7
View File
@@ -56,6 +56,13 @@ SEARXNG_INSTANCE=http://localhost:8080
# SQLite database path (default: sqlite:///./data/app.db)
# DATABASE_URL=sqlite:///./data/app.db
# ============================================================
# Data directory
# ============================================================
# Move everything that lives under data/ - settings, sessions, database, auth,
# cache, uploads, etc. - to another path:
# ODYSSEUS_DATA_DIR=C:\path\to\dir
# ============================================================
# Auth & Security
# ============================================================
+3 -3
View File
@@ -98,11 +98,11 @@ If you are unsure whether a change is "visual," it is. Default to attaching a sc
Don't hardcode values that the project already exposes through a constant or a helper. Hardcoded literals drift out of sync, break on non-default deployments, and reintroduce bugs we've already fixed.
- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree or hardcode `/app/...`. Use `DATA_DIR` (and the other path constants) from `core.constants`, e.g. `Path(DATA_DIR) / "logs" / "x.log"`. The source tree is read-only in Docker, and `/app/...` does not exist on native runs. Guard directory creation so an unwritable path degrades gracefully instead of crashing at import.
- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `core.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`).
- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree, hardcode `/app/...`, or use a relative `"data/..."` string. Every persisted file and directory has a named constant in `src/constants.py` (for example `AUTH_FILE`, `USER_PREFS_FILE`, `SETTINGS_FILE`, `TTS_CACHE_DIR`, `CHROMA_DIR`). Import and use that named constant; do not re-derive the path locally with `os.path.join(DATA_DIR, "x.json")` or `DATA_DIR / "x.json"`. `DATA_DIR` is the single place that reads `ODYSSEUS_DATA_DIR`, so use it directly only for dynamic paths that have no fixed name (for example per-owner files). If a data file or directory has no constant yet, add one to `src/constants.py`. The source tree is read-only in Docker and `/app/...` does not exist on native runs; guard directory creation so an unwritable path degrades gracefully instead of crashing at import.
- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `src.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`).
- **Ports, limits, model lists, and similar:** reuse the existing constant if one exists; if it doesn't and the value is used in more than one place, add a constant rather than copying the literal.
If you need a value that has no constant or helper yet, add one in the appropriate module (usually `core/constants.py` or `src/constants.py`) and import it, rather than repeating a literal across files.
If you need a value that has no constant or helper yet, add it to `src/constants.py` (the single source of truth for paths and config; `core/constants.py` only re-exports it for backward compatibility) and import it, rather than repeating a literal across files.
**Commits:** use [Conventional Commits](https://www.conventionalcommits.org), `type(scope): summary` (e.g. `fix(search): ...`, `feat(notes): ...`, `docs(contributing): ...`). Common types: `fix`, `feat`, `refactor`, `docs`, `test`, `chore`, `ci`. Keep the subject short and imperative; put the "why" in the body when it isn't obvious.
+3 -3
View File
@@ -51,7 +51,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
# Core imports
from core.constants import (
BASE_DIR, STATIC_DIR, SESSIONS_FILE,
REQUEST_TIMEOUT, OPENAI_API_KEY,
REQUEST_TIMEOUT, OPENAI_API_KEY, AUTH_FILE,
)
from core.database import SessionLocal, ApiToken
from core.middleware import SecurityHeadersMiddleware, is_cors_preflight
@@ -954,7 +954,7 @@ async def _startup_event():
owners = set()
try:
import json as _json
auth_path = "data/auth.json"
auth_path = AUTH_FILE
with open(auth_path, encoding="utf-8") as f:
users = _json.load(f).get("users", {})
owners.update(users.keys())
@@ -1001,7 +1001,7 @@ async def _startup_event():
# does not make an existing library look empty after auth/account changes.
try:
import json as _json
auth_path = "data/auth.json"
auth_path = AUTH_FILE
with open(auth_path, encoding="utf-8") as f:
users = _json.load(f).get("users", {})
primary_owner = None
+3 -1
View File
@@ -14,6 +14,8 @@ import uuid
import bcrypt
from src.constants import AUTH_FILE
PAIRING_VERSION = 1
COMPANION_SCOPE = "chat"
@@ -61,7 +63,7 @@ def lan_ip_candidates() -> list[str]:
def find_admin_user() -> str | None:
"""Resolve an admin username from data/auth.json (schema uses is_admin),
falling back to the first user."""
auth_path = os.path.join("data", "auth.json")
auth_path = AUTH_FILE
try:
with open(auth_path, "r", encoding="utf-8") as f:
data = json.load(f)
+2 -3
View File
@@ -37,9 +37,8 @@ DEFAULT_PRIVILEGES = {
ADMIN_PRIVILEGES = {k: (True if isinstance(v, bool) else (0 if isinstance(v, int) else [])) for k, v in DEFAULT_PRIVILEGES.items()}
ADMIN_PRIVILEGES["allowed_models_restricted"] = False
DEFAULT_AUTH_PATH = os.path.join(
Path(__file__).parent.parent, "data", "auth.json"
)
from src.constants import AUTH_FILE
DEFAULT_AUTH_PATH = AUTH_FILE
TOKEN_TTL = 60 * 60 * 24 * 7 # 7 days
# Usernames the auth + middleware layer reserve as internal "synthetic owner"
+10 -57
View File
@@ -1,59 +1,12 @@
# src/constants.py
"""Application-wide constants and configuration values."""
import os
# core/constants.py
"""Backward-compatible shim — the single source of truth is src/constants.py.
APP_VERSION = "0.9.1"
# Base paths
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
STATIC_DIR = os.path.join(BASE_DIR, "static")
DATA_DIR = os.path.join(BASE_DIR, "data")
# Data file paths
SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json")
MEMORY_FILE = os.path.join(DATA_DIR, "memory.json")
MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md")
PERSONAL_DIR = os.path.join(DATA_DIR, "personal_docs")
RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook")
UPLOAD_DIR = os.path.join(DATA_DIR, "uploads")
FEATURES_FILE = os.path.join(DATA_DIR, "features.json")
SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json")
# API Configuration
MAX_CONTEXT_MESSAGES = 90
REQUEST_TIMEOUT = 20
OPENAI_COMPAT_PATH = "/v1/chat/completions"
# Environment variables with defaults
DEFAULT_HOST = os.getenv("LLM_HOST", "localhost")
LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()]
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
SEARXNG_INSTANCE = os.getenv('SEARXNG_INSTANCE', 'http://localhost:8080')
# Cleanup configuration
CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true"
CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))
# Default parameters
DEFAULT_TEMPERATURE = 1.0
DEFAULT_MAX_TOKENS = 0
def internal_api_base() -> str:
"""Base URL for in-process loopback calls to Odysseus's own API.
Agent tools and background jobs reach admin-gated routes by calling the
running server over HTTP. Resolution order:
1. ODYSSEUS_INTERNAL_BASE - explicit override (e.g. behind a TLS proxy).
2. APP_PORT - http://127.0.0.1:$APP_PORT (docker-compose).
3. Fallback http://127.0.0.1:7000 - legacy default.
127.0.0.1 (not "localhost") avoids IPv6/DNS ambiguity for a strictly-local
call. Without this, loopback tools fail with "All connection attempts
failed" whenever the server is not on port 7000.
Historically there were two copies of this module (this one lagged behind at
APP_VERSION 0.9.1 and was missing the consolidated tool-output constants). To
kill the drift, this now simply re-exports everything from src.constants so
there is exactly one place that defines paths and reads ODYSSEUS_DATA_DIR.
internal_api_base() also lives in src.constants now and is re-exported here so
existing `from core.constants import internal_api_base` callers keep working.
"""
override = os.environ.get("ODYSSEUS_INTERNAL_BASE")
if override:
return override.rstrip("/")
return f"http://127.0.0.1:{os.environ.get('APP_PORT', '7000')}"
from src.constants import * # noqa: F401,F403
from src.constants import internal_api_base # noqa: F401 (explicit: functions aren't covered by some linters' * checks)
+7 -6
View File
@@ -29,8 +29,9 @@ class TimestampMixin:
def updated_at(cls):
return Column(DateTime, default=utcnow_naive, onupdate=utcnow_naive, nullable=False)
# Get database URL from environment, default to SQLite
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/app.db")
# Get database URL from environment, default to SQLite in DATA_DIR
from src.constants import DATA_DIR, AUTH_FILE, MEMORY_FILE, USER_PREFS_FILE, SETTINGS_FILE
DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{DATA_DIR}/app.db")
# Create engine
engine = create_engine(
@@ -1065,7 +1066,7 @@ def _migrate_assign_legacy_owner():
# fell through to "first user" every time.
auth_path = os.path.join(os.path.dirname(DATABASE_URL.replace("sqlite:///", "")), "auth.json")
if not os.path.isabs(auth_path):
auth_path = os.path.join("data", "auth.json")
auth_path = AUTH_FILE
admin_user = None
try:
with open(auth_path, "r", encoding="utf-8") as f:
@@ -1118,7 +1119,7 @@ def _migrate_assign_legacy_owner():
logger.warning(f"Legacy owner migration failed: {e}")
# Also migrate memory.json
mem_path = os.path.join("data", "memory.json")
mem_path = MEMORY_FILE
try:
if os.path.exists(mem_path):
with open(mem_path, "r", encoding="utf-8") as f:
@@ -1136,7 +1137,7 @@ def _migrate_assign_legacy_owner():
logger.warning(f"memory.json legacy migration failed: {e}")
# Also migrate user_prefs.json to per-user format
prefs_path = os.path.join("data", "user_prefs.json")
prefs_path = USER_PREFS_FILE
try:
if os.path.exists(prefs_path):
with open(prefs_path, "r", encoding="utf-8") as f:
@@ -1530,7 +1531,7 @@ def _migrate_seed_email_account():
import json as _json
import uuid as _uuid
from pathlib import Path
settings_file = Path("data/settings.json")
settings_file = Path(SETTINGS_FILE)
if not settings_file.exists():
return
try:
+6 -5
View File
@@ -31,7 +31,8 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
server = Server("email")
EMAIL_SOCKET_TIMEOUT = float(os.environ.get("EMAIL_SOCKET_TIMEOUT", "20"))
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
from src.constants import DATA_DIR as _DATA_DIR, APP_DB, EMAIL_CACHE_DB, SETTINGS_FILE as _SETTINGS_FILE, MAIL_ATTACHMENTS_DIR
DATA_DIR = Path(_DATA_DIR)
def _b(value) -> bytes:
@@ -63,7 +64,7 @@ def _clean_header_value(value) -> str:
def _db_path() -> Path:
return DATA_DIR / "app.db"
return Path(APP_DB)
def _list_accounts_raw() -> list:
@@ -162,7 +163,7 @@ def _load_config(account: str | None = None) -> dict:
"trash_folder": os.environ.get("TRASH_FOLDER", "Trash"),
"cache_db": os.environ.get(
"EMAIL_CACHE_DB",
str(DATA_DIR / "email_cache.db"),
EMAIL_CACHE_DB,
),
"account_id": None,
"account_name": None,
@@ -204,7 +205,7 @@ def _load_config(account: str | None = None) -> dict:
else:
# Legacy fallback: settings.json flat keys
try:
settings_path = Path(__file__).resolve().parent.parent / "data" / "settings.json"
settings_path = Path(_SETTINGS_FILE)
if settings_path.exists():
settings = json.loads(settings_path.read_text(encoding="utf-8"))
for key in (
@@ -1061,7 +1062,7 @@ def _download_attachment(uid, index, folder="INBOX", account=None):
raw = msg_data[0][1]
msg = email.message_from_bytes(raw)
target_dir = DATA_DIR / "mail-attachments" / f"{folder}_{uid}"
target_dir = Path(MAIL_ATTACHMENTS_DIR) / f"{folder}_{uid}"
filepath = _extract_attachment_to_disk(msg, index, target_dir)
if not filepath:
return {"error": f"Attachment index {index} not found"}
+3 -1
View File
@@ -16,6 +16,8 @@ from mcp.types import Tool, TextContent
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from src.constants import GENERATED_IMAGES_DIR
server = Server("image_gen")
@@ -121,7 +123,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
_pub_base = (get_setting("app_public_url", "") or "").rstrip("/")
if img.get("b64_json"):
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
filename = f"{uuid.uuid4().hex[:12]}.png"
img_path = img_dir / filename
+5 -5
View File
@@ -31,7 +31,7 @@ from core.database import (
CalendarEvent,
CalendarCal,
)
from src.constants import DATA_DIR
from src.constants import DATA_DIR, SKILLS_DIR, SKILLS_FILE, GALLERY_DIR, GALLERY_UPLOADS_DIR
logger = logging.getLogger(__name__)
@@ -107,7 +107,7 @@ def setup_admin_wipe_routes(session_manager):
# Skills live as SKILL.md files under data/skills/. Drop
# the entire directory; the SkillsManager re-creates the
# tree on next write.
skills_dir = os.path.join(DATA_DIR, "skills")
skills_dir = SKILLS_DIR
count = 0
if os.path.isdir(skills_dir):
# Count SKILL.md files for the response — quick walk.
@@ -115,7 +115,7 @@ def setup_admin_wipe_routes(session_manager):
count += sum(1 for f in files if f == "SKILL.md")
_rmtree_quiet(skills_dir)
# Legacy fallback file
legacy = os.path.join(DATA_DIR, "skills.json")
legacy = SKILLS_FILE
if os.path.exists(legacy):
try:
os.remove(legacy)
@@ -151,8 +151,8 @@ def setup_admin_wipe_routes(session_manager):
db.query(GalleryAlbum).delete()
db.commit()
# Also drop the upload dir so disk doesn't keep orphans.
_rmtree_quiet(os.path.join(DATA_DIR, "gallery"))
_rmtree_quiet(os.path.join(DATA_DIR, "gallery_uploads"))
_rmtree_quiet(GALLERY_DIR)
_rmtree_quiet(GALLERY_UPLOADS_DIR)
return {"status": "deleted", "kind": kind, "count": count}
if kind == "calendar":
+4 -4
View File
@@ -17,7 +17,7 @@ from fastapi.responses import StreamingResponse
from src.auth_helpers import require_authenticated_request, require_user
from src.tool_implementations import do_manage_notes
from core.constants import DATA_DIR
from src.constants import COOKBOOK_STATE_FILE
COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"}
@@ -425,8 +425,8 @@ def setup_codex_routes(
def _read_cookbook_state() -> dict:
from pathlib import Path as _Path
import os as _os, json as _json
p = _Path(DATA_DIR) / "cookbook_state.json"
import json as _json
p = _Path(COOKBOOK_STATE_FILE)
if not p.exists():
return {}
try:
@@ -734,7 +734,7 @@ def setup_codex_routes(
import time as _t, json as _json
from core.atomic_io import atomic_write_json
from pathlib import Path as _Path
cookbook_state_path = _Path(DATA_DIR) / "cookbook_state.json"
cookbook_state_path = _Path(COOKBOOK_STATE_FILE)
try:
state = _json.loads(cookbook_state_path.read_text(encoding="utf-8"))
except Exception:
+4 -3
View File
@@ -25,9 +25,10 @@ from src.url_safety import check_outbound_url
logger = logging.getLogger(__name__)
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
SETTINGS_FILE = DATA_DIR / "settings.json"
LOCAL_CONTACTS_FILE = DATA_DIR / "contacts.json"
from src.constants import DATA_DIR as _DATA_DIR, SETTINGS_FILE as _SETTINGS_FILE, CONTACTS_FILE as _CONTACTS_FILE
DATA_DIR = Path(_DATA_DIR)
SETTINGS_FILE = Path(_SETTINGS_FILE)
LOCAL_CONTACTS_FILE = Path(_CONTACTS_FILE)
def _load_settings():
+3 -2
View File
@@ -15,6 +15,7 @@ from pathlib import Path
from fastapi import APIRouter, HTTPException, Request, Depends
from src.auth_helpers import require_user
from src.constants import COOKBOOK_STATE_FILE
from pydantic import BaseModel
from core.middleware import require_admin
@@ -33,7 +34,7 @@ from core.platform_compat import (
get_wsl_windows_user_profile,
)
from routes.shell_routes import TMUX_LOG_DIR
from core.constants import DATA_DIR
from src.constants import COOKBOOK_STATE_FILE
logger = logging.getLogger(__name__)
@@ -61,7 +62,7 @@ _HF_TOKEN_STATUS_SNIPPET = (
def setup_cookbook_routes() -> APIRouter:
router = APIRouter(tags=["cookbook"])
_cookbook_state_path = Path(DATA_DIR) / "cookbook_state.json"
_cookbook_state_path = Path(COOKBOOK_STATE_FILE)
def _mask_secret(value: str) -> str:
if not value:
+2 -4
View File
@@ -11,6 +11,7 @@ from sqlalchemy import case, func, or_
from core.database import SessionLocal, Document, DocumentVersion
from core.database import Session as DbSession
from src.auth_helpers import get_current_user
from src.constants import MAIL_ATTACHMENTS_DIR
logger = logging.getLogger(__name__)
@@ -1542,10 +1543,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
# don't import from a routes file (cycle-prone). Same env override
# as email_routes (ODYSSEUS_MAIL_ATTACHMENTS_DIR).
from pathlib import Path as _Path
import os as _os
_DATA_DIR = _Path(__file__).resolve().parent.parent / "data"
_BASE = _os.environ.get("ODYSSEUS_MAIL_ATTACHMENTS_DIR", str(_DATA_DIR / "mail-attachments"))
_COMPOSE_DIR = _Path(_BASE) / "_compose"
_COMPOSE_DIR = _Path(MAIL_ATTACHMENTS_DIR) / "_compose"
_COMPOSE_DIR.mkdir(parents=True, exist_ok=True)
user = get_current_user(request)
+5 -4
View File
@@ -254,16 +254,17 @@ def _cleanup_compose_uploads(tokens) -> None:
pass
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
SETTINGS_FILE = DATA_DIR / "settings.json"
from src.constants import DATA_DIR as _DATA_DIR, MAIL_ATTACHMENTS_DIR, SETTINGS_FILE as _SETTINGS_FILE, SCHEDULED_EMAILS_DB
DATA_DIR = Path(_DATA_DIR)
SETTINGS_FILE = Path(_SETTINGS_FILE)
# Override at deploy time via ODYSSEUS_MAIL_ATTACHMENTS_DIR. Defaults to a
# subdir of the install's data/ tree so the app works out-of-the-box without
# a hardcoded /home/<user>/ path.
ATTACHMENTS_DIR = Path(os.environ.get("ODYSSEUS_MAIL_ATTACHMENTS_DIR", str(DATA_DIR / "mail-attachments")))
ATTACHMENTS_DIR = Path(MAIL_ATTACHMENTS_DIR)
ATTACHMENTS_DIR.mkdir(parents=True, exist_ok=True)
COMPOSE_UPLOADS_DIR = ATTACHMENTS_DIR / "_compose"
COMPOSE_UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
SCHEDULED_DB = DATA_DIR / "scheduled_emails.db"
SCHEDULED_DB = Path(SCHEDULED_EMAILS_DB)
OWNER_SCOPED_EMAIL_CACHE_TABLES = {
+2 -1
View File
@@ -32,6 +32,7 @@ from email.mime.multipart import MIMEMultipart
from fastapi import APIRouter, Query, UploadFile, File, BackgroundTasks, HTTPException, Depends, Request
from fastapi.responses import FileResponse
from src.constants import DATA_DIR
from src.llm_core import llm_call_async
from src.upload_limits import read_upload_limited
@@ -2904,7 +2905,7 @@ def setup_email_routes():
from pathlib import Path as _P
import json as _json
_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
path = _P(f"data/email_urgency_state_{_slug}.json")
path = _P(DATA_DIR) / f"email_urgency_state_{_slug}.json"
if not path.exists():
return {"total_unread": 0, "total_urgent": 0, "max_score": 0, "per_uid": {}}
try:
+3 -9
View File
@@ -7,12 +7,12 @@ import logging
import asyncio
from pathlib import Path
from fastapi import APIRouter, HTTPException, Form, Depends
from core.constants import BASE_DIR
from core.constants import EMBEDDING_ENDPOINT_FILE, FASTEMBED_CACHE_DIR
from core.middleware import require_admin
logger = logging.getLogger(__name__)
_ENDPOINT_FILE = os.path.join(BASE_DIR, "data", "embedding_endpoint.json")
_ENDPOINT_FILE = EMBEDDING_ENDPOINT_FILE
# Track in-progress downloads
_downloading: dict = {}
@@ -35,13 +35,7 @@ def _cache_dir() -> str:
default lived in /tmp, which many systems wipe on reboot forcing a
full re-download of the embedding model after every restart.
"""
env = os.environ.get("FASTEMBED_CACHE_PATH")
if env:
return env
return os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"data", "fastembed_cache",
)
return FASTEMBED_CACHE_DIR
def _model_cache_name(hf_source: str) -> str:
+3 -1
View File
@@ -18,9 +18,11 @@ import httpx
from fastapi import APIRouter
from fastapi.responses import Response
from src.constants import EMOJI_CACHE_DIR
logger = logging.getLogger(__name__)
_CACHE_DIR = Path(__file__).resolve().parent.parent / "data" / "emoji_cache"
_CACHE_DIR = Path(EMOJI_CACHE_DIR)
# OpenMoji "black" set = monochrome line-art SVGs. Filenames are the codepoints
# in UPPERCASE (FE0F dropped, same as we compute), '-' joined.
_OPENMOJI_BASE = "https://cdn.jsdelivr.net/npm/openmoji@15.0.0/black/svg"
+4 -3
View File
@@ -14,6 +14,7 @@ from core.database import SessionLocal, GalleryImage, GalleryAlbum, ModelEndpoin
from core.database import Session as DbSession
from src.auth_helpers import get_current_user, require_privilege
from src.upload_limits import read_upload_limited
from src.constants import GENERATED_IMAGES_DIR
from routes.gallery_helpers import (
GalleryPatch, _extract_exif, _image_to_dict, _owner_filter, _human_size,
@@ -33,7 +34,7 @@ def _sanitize_gallery_filename(filename: str) -> str:
return safe_name
GALLERY_IMAGE_DIR = Path("data/generated_images")
GALLERY_IMAGE_DIR = Path(GENERATED_IMAGES_DIR)
def _gallery_image_path(filename: str) -> Path:
@@ -133,7 +134,7 @@ def setup_gallery_routes() -> APIRouter:
return {"ok": False, "duplicate": True, "filename": existing.filename,
"id": existing.id, "message": "Duplicate photo skipped"}
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
ext = file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else "png"
@@ -199,7 +200,7 @@ def setup_gallery_routes() -> APIRouter:
raise HTTPException(400, "No image provided")
content = await read_upload_limited(file, GALLERY_UPLOAD_MAX_BYTES, "Gallery replacement")
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
img_path = img_dir / _sanitize_gallery_filename(img.filename)
img_path.write_bytes(content)
+2 -2
View File
@@ -13,7 +13,7 @@ import httpx
from core.database import McpServer, SessionLocal
from core.middleware import require_admin
from src.constants import DATA_DIR
from src.constants import DATA_DIR, MCP_OAUTH_DIR
from src.mcp_manager import McpManager
logger = logging.getLogger(__name__)
@@ -23,7 +23,7 @@ router = APIRouter(prefix="/api/mcp", tags=["mcp"])
def _mcp_oauth_base_dir() -> Path:
"""Directory that may contain OAuth files managed by Odysseus."""
return (Path(DATA_DIR) / "mcp_oauth").resolve(strict=False)
return Path(MCP_OAUTH_DIR).resolve(strict=False)
def _resolve_mcp_oauth_path(raw_path, field_name: str) -> str:
+3 -2
View File
@@ -11,6 +11,7 @@ from pydantic import BaseModel
from core.database import SessionLocal, Note
from src.auth_helpers import get_current_user
from src.constants import DATA_DIR
from sqlalchemy.orm.attributes import flag_modified
logger = logging.getLogger(__name__)
@@ -170,7 +171,7 @@ async def dispatch_reminder(
from datetime import datetime as _dt, timezone as _tz, timedelta as _td
from pathlib import Path as _P
_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
cache_path = _P(f"data/note_pings_{_slug}.json")
cache_path = _P(DATA_DIR) / f"note_pings_{_slug}.json"
if cache_path.exists():
cache = _json.loads(cache_path.read_text(encoding="utf-8"))
last = cache.get(cache_key)
@@ -523,7 +524,7 @@ async def dispatch_reminder(
_STATE = cache_path
if _STATE is None:
_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
_STATE = _P(f"data/note_pings_{_slug}.json")
_STATE = _P(DATA_DIR) / f"note_pings_{_slug}.json"
_STATE.parent.mkdir(parents=True, exist_ok=True)
try:
_cache = cache or (_json.loads(_STATE.read_text(encoding="utf-8")) if _STATE.exists() else {})
+2 -2
View File
@@ -6,13 +6,13 @@ import uuid
from typing import List, Tuple
from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
from src.request_models import DirectoryRequest
from core.constants import BASE_DIR, PERSONAL_DIR
from core.constants import BASE_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR
from src.rag_singleton import get_rag_manager
from src.auth_helpers import require_privilege, require_user
from core.middleware import require_admin
from src.upload_handler import secure_filename
UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads")
UPLOADS_DIR = PERSONAL_UPLOADS_DIR
MAX_PERSONAL_UPLOAD_BYTES = int(
os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024))
)
+2 -1
View File
@@ -4,8 +4,9 @@ import os
from typing import Optional
from fastapi import APIRouter, Request
from src.auth_helpers import get_current_user
from src.constants import USER_PREFS_FILE
PREFS_FILE = os.path.join("data", "user_prefs.json")
PREFS_FILE = USER_PREFS_FILE
def _load():
+9 -8
View File
@@ -14,6 +14,7 @@ from fastapi.responses import HTMLResponse, StreamingResponse
from pydantic import BaseModel, Field
from src.endpoint_resolver import resolve_endpoint
from src.auth_helpers import _auth_disabled, get_current_user
from src.constants import DEEP_RESEARCH_DIR
_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")
@@ -100,7 +101,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if entry is not None:
return entry.get("owner", "") == user
# Task no longer in memory — check the persisted JSON.
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if not path.exists():
return False
try:
@@ -164,7 +165,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
def _assert_owns_research(session_id: str, user: str) -> None:
"""404-not-403 ownership gate for a research session's on-disk JSON.
Use BEFORE returning any data or mutating the file."""
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if not path.exists():
raise HTTPException(404, "Research not found")
try:
@@ -227,7 +228,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
):
user = _require_user(request)
"""List all completed research for the Library panel."""
data_dir = Path("data/deep_research")
data_dir = Path(DEEP_RESEARCH_DIR)
items = []
for p in data_dir.glob("*.json"):
try:
@@ -277,7 +278,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
summary, stats used by the Library preview panel."""
user = _require_user(request)
_validate_session_id(session_id)
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if not path.exists():
raise HTTPException(404, "Research not found")
try:
@@ -294,7 +295,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
"""Soft-archive / restore a research report (sets `archived` in its JSON)."""
user = _require_user(request)
_validate_session_id(session_id)
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if not path.exists():
raise HTTPException(404, "Research not found")
try:
@@ -314,7 +315,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
"""Delete a research result from disk."""
user = _require_user(request)
_validate_session_id(session_id)
data_dir = Path("data/deep_research")
data_dir = Path(DEEP_RESEARCH_DIR)
json_path = data_dir / f"{session_id}.json"
deleted = False
if json_path.exists():
@@ -496,7 +497,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
raise HTTPException(404, "No research found for this session")
result = research_handler.get_result(session_id)
if result is None:
p = Path("data/deep_research") / f"{session_id}.json"
p = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if p.exists():
d = json.loads(p.read_text(encoding="utf-8"))
return {
@@ -536,7 +537,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
sources = research_handler.get_sources(session_id) or []
query = ""
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if path.exists():
try:
disk = json.loads(path.read_text(encoding="utf-8"))
+3 -2
View File
@@ -13,6 +13,7 @@ from pydantic import BaseModel
from core.database import SessionLocal, ScheduledTask, TaskRun
from core.constants import internal_api_base
from src.auth_helpers import get_current_user
from src.constants import DATA_DIR, EMAIL_URGENCY_CACHE_DIR
from src.task_scheduler import compute_next_run, HOUSEKEEPING_DEFAULTS
from routes.prefs_routes import _load_for_user, _save_for_user
@@ -621,7 +622,7 @@ def setup_task_routes(task_scheduler) -> APIRouter:
removed_files = 0
if action == "check_email_urgency":
cache_dir = Path("data/email_urgency_cache")
cache_dir = Path(EMAIL_URGENCY_CACHE_DIR)
if cache_dir.exists():
for child in cache_dir.glob("*.json"):
try:
@@ -630,7 +631,7 @@ def setup_task_routes(task_scheduler) -> APIRouter:
except Exception:
pass
owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (user or "default"))
for state_path in [Path(f"data/email_urgency_state_{owner_slug}.json")]:
for state_path in [Path(DATA_DIR) / f"email_urgency_state_{owner_slug}.json"]:
try:
if state_path.exists():
state_path.unlink()
+2 -1
View File
@@ -17,10 +17,11 @@ from pydantic import BaseModel
from core.middleware import require_admin
from core.platform_compat import IS_WINDOWS, safe_chmod, which_tool
from src.constants import VAULT_FILE as _VAULT_FILE
logger = logging.getLogger(__name__)
VAULT_FILE = Path("data/vault.json")
VAULT_FILE = Path(_VAULT_FILE)
def _find_bw() -> str:
+4 -2
View File
@@ -13,6 +13,8 @@ import json
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.constants import MEMORY_FILE, SKILLS_FILE
def claim_json_entries(entries, owner):
count = 0
@@ -35,8 +37,8 @@ def main():
# 1. Memories (JSON files)
for label, path in [
("memory.json", "data/memory.json"),
("skills.json", "data/skills.json"),
("memory.json", MEMORY_FILE),
("skills.json", SKILLS_FILE),
]:
if not os.path.exists(path):
print(f" {label}: not found, skipping")
+4 -1
View File
@@ -19,6 +19,9 @@ import sys
from pathlib import Path
from typing import List, Tuple
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.constants import PERSONAL_DIR
# Configure logging for the script
logging.basicConfig(
level=logging.INFO,
@@ -45,7 +48,7 @@ def main():
rag_manager = RAGManager()
# Directory to scan
docs_directory = "data/personal_docs"
docs_directory = PERSONAL_DIR
directory_path = Path(docs_directory)
# Check if directory exists
+3 -3
View File
@@ -63,10 +63,10 @@ def migrate_memories():
"""Migrate memory vectors from FAISS to ChromaDB."""
from src.chroma_client import get_chroma_client
from src.embeddings import get_embedding_client
from src.constants import DATA_DIR
from src.constants import MEMORY_VECTORS_DIR, MEMORY_FILE
ids_path = os.path.join(DATA_DIR, "memory_vectors", "ids.json")
memory_path = os.path.join(DATA_DIR, "memory.json")
ids_path = os.path.join(MEMORY_VECTORS_DIR, "ids.json")
memory_path = MEMORY_FILE
if not os.path.exists(ids_path):
logger.info("No memory FAISS index found, skipping memory migration")
+2 -1
View File
@@ -5,6 +5,7 @@ from dataclasses import dataclass
from typing import List, Dict, Any
from src.rag_manager import RAGManager
from src.constants import CHROMA_DIR
@dataclass
@@ -34,7 +35,7 @@ class DocsService:
results = await service.query("what is async await?")
"""
def __init__(self, persist_dir: str = "data/chroma"):
def __init__(self, persist_dir: str = CHROMA_DIR):
self.rag = RAGManager(persist_directory=persist_dir)
async def query(self, query: str, top_k: int = 5) -> List[DocChunk]:
+2 -1
View File
@@ -8,6 +8,7 @@ import os
from .memory import MemoryManager
from .memory_vector import MemoryVectorStore
from src.memory_provider import MemoryRecord, NativeMemoryProvider
from src.constants import DATA_DIR
@dataclass
@@ -38,7 +39,7 @@ class MemoryService:
results = await service.recall("preferences")
"""
def __init__(self, data_dir: str = "data"):
def __init__(self, data_dir: str = DATA_DIR):
self.manager = MemoryManager(data_dir)
self.vector_store = MemoryVectorStore(data_dir) if os.path.exists(
os.path.join(data_dir, "memory_vectors")
+2 -1
View File
@@ -15,10 +15,11 @@ from pathlib import Path
from typing import Optional, Dict
from src.research_utils import is_low_quality
from src.constants import DEEP_RESEARCH_DIR
logger = logging.getLogger(__name__)
RESEARCH_DATA_DIR = Path("data/deep_research")
RESEARCH_DATA_DIR = Path(DEEP_RESEARCH_DIR)
class ResearchHandler:
+3 -1
View File
@@ -9,6 +9,8 @@ import httpx
from pathlib import Path
from typing import Optional, Dict, Any
from src.constants import TTS_CACHE_DIR
logger = logging.getLogger(__name__)
@@ -35,7 +37,7 @@ class TTSService:
"endpoint:<id>" OpenAI-compatible /audio/speech via ModelEndpoint
"""
def __init__(self, cache_dir: str = "data/tts_cache"):
def __init__(self, cache_dir: str = TTS_CACHE_DIR):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self._kokoro = None # lazy-init
+16 -11
View File
@@ -12,19 +12,24 @@ import subprocess
import sys
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
sys.path.insert(0, BASE_DIR)
from src.constants import (
DATA_DIR, AUTH_FILE, UPLOAD_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR,
TTS_CACHE_DIR, GENERATED_IMAGES_DIR, DEEP_RESEARCH_DIR, CHROMA_DIR,
RAG_DIR, MEMORY_VECTORS_DIR,
)
DIRS = [
DATA_DIR,
os.path.join(DATA_DIR, "uploads"),
os.path.join(DATA_DIR, "personal_docs"),
os.path.join(DATA_DIR, "personal_uploads"),
os.path.join(DATA_DIR, "tts_cache"),
os.path.join(DATA_DIR, "generated_images"),
os.path.join(DATA_DIR, "deep_research"),
os.path.join(DATA_DIR, "chroma"),
os.path.join(DATA_DIR, "rag"),
os.path.join(DATA_DIR, "memory_vectors"),
UPLOAD_DIR,
PERSONAL_DIR,
PERSONAL_UPLOADS_DIR,
TTS_CACHE_DIR,
GENERATED_IMAGES_DIR,
DEEP_RESEARCH_DIR,
CHROMA_DIR,
RAG_DIR,
MEMORY_VECTORS_DIR,
os.path.join(BASE_DIR, "logs"),
]
@@ -74,7 +79,7 @@ def _prompt_admin_credentials():
def create_default_admin():
"""Create an initial admin user if none exists."""
auth_path = os.path.join(DATA_DIR, "auth.json")
auth_path = AUTH_FILE
if os.path.exists(auth_path):
print(" [skip] auth.json already exists")
return "exists"
+4 -2
View File
@@ -14,6 +14,8 @@ import uuid
import time
from typing import Dict, Optional, Tuple
from src.constants import GENERATED_IMAGES_DIR
logger = logging.getLogger(__name__)
AI_CHAT_TIMEOUT = 120 # seconds for a single LLM call
@@ -1715,7 +1717,7 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
# GPT image models always return b64_json; DALL-E may return url
if img.get("b64_json"):
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
filename = f"{uuid.uuid4().hex[:12]}.png"
img_path = img_dir / filename
@@ -1728,7 +1730,7 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
try:
dl_resp = httpx.get(img["url"], timeout=60)
if dl_resp.status_code == 200:
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
filename = f"{uuid.uuid4().hex[:12]}.png"
img_path = img_dir / filename
+4 -3
View File
@@ -38,9 +38,10 @@ from core.platform_compat import (
pid_alive,
)
_DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))
_JOBS_DIR = _DATA_DIR / "bg_jobs"
_STORE = _DATA_DIR / "bg_jobs.json"
from src.constants import BG_JOBS_DIR, BG_JOBS_FILE
_JOBS_DIR = Path(BG_JOBS_DIR)
_STORE = Path(BG_JOBS_FILE)
# A job that runs longer than this is presumed stuck and reaped (the agent
# still gets a "timed out" follow-up so nothing hangs forever).
+9 -8
View File
@@ -12,7 +12,8 @@ from typing import Tuple
from src.auth_helpers import owner_filter
from core.platform_compat import IS_WINDOWS, find_bash
from core.constants import DATA_DIR, internal_api_base
from core.constants import internal_api_base
from src.constants import DATA_DIR, DEEP_RESEARCH_DIR, TIDY_CALENDAR_STATE_FILE, EMAIL_URGENCY_CACHE_DIR, COOKBOOK_STATE_FILE
logger = logging.getLogger(__name__)
@@ -349,7 +350,7 @@ async def action_tidy_research(owner: str, **kwargs) -> Tuple[str, bool]:
try:
from pathlib import Path
import json as _json
research_dir = Path("data/deep_research")
research_dir = Path(DEEP_RESEARCH_DIR)
if not research_dir.exists():
raise TaskNoop("no research directory")
files = list(research_dir.glob("*.json"))
@@ -387,7 +388,7 @@ async def action_tidy_calendar(owner: str, **kwargs) -> Tuple[str, bool]:
from core.database import SessionLocal, CalendarEvent
from sqlalchemy import func
STATE_FILE = Path("data/tidy_calendar_state.json")
STATE_FILE = Path(TIDY_CALENDAR_STATE_FILE)
last_watermark = None
try:
if STATE_FILE.exists():
@@ -1304,12 +1305,12 @@ async def action_ping_notes(owner: str, **kwargs) -> Tuple[str, bool]:
# users' entries (review C4). Legacy path kept as fallback so a
# single-user install (empty owner) doesn't lose its history.
_owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
STATE = _P(f"data/note_pings_{_owner_slug}.json")
STATE = _P(DATA_DIR) / f"note_pings_{_owner_slug}.json"
STATE.parent.mkdir(parents=True, exist_ok=True)
# One-time migration: if legacy global file exists and per-owner file
# doesn't, seed from global (entries for OTHER owners still get pruned
# on their first run — acceptable, prevents silent loss).
_legacy = _P("data/note_pings.json")
_legacy = _P(DATA_DIR) / "note_pings.json"
if _legacy.exists() and not STATE.exists():
try:
STATE.write_text(_legacy.read_text(encoding="utf-8"), encoding="utf-8")
@@ -1466,8 +1467,8 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
# notified_uids / urgency counts. Empty owner falls back to a generic
# filename for single-user installs (matches prior behaviour).
_owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
STATE_PATH = _P(f"data/email_urgency_state_{_owner_slug}.json")
CACHE_DIR = _P("data/email_urgency_cache")
STATE_PATH = _P(DATA_DIR) / f"email_urgency_state_{_owner_slug}.json"
CACHE_DIR = _P(EMAIL_URGENCY_CACHE_DIR)
CACHE_DIR.mkdir(parents=True, exist_ok=True)
STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
AGE_CUTOFF = _dt.utcnow() - _td(days=7)
@@ -2043,7 +2044,7 @@ async def action_cookbook_serve(
except Exception:
end_after_min = 0
state_path = Path(DATA_DIR) / "cookbook_state.json"
state_path = Path(COOKBOOK_STATE_FILE)
try:
state = json.loads(state_path.read_text(encoding="utf-8")) if state_path.exists() else {}
except Exception:
+10 -8
View File
@@ -4,6 +4,8 @@ from typing import List, Optional
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field, field_validator
from src.constants import DATA_DIR as _DATA_DIR_CONST
# Cross-platform OS flag, exposed here so callers can `from src.config import
# IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported
# from core.platform_compat, to keep this dependency-light config module from
@@ -20,13 +22,13 @@ class DataConfig(BaseSettings):
base_dir: Path = Field(default=Path(__file__).parent.parent, description="Base directory for the application")
# Data paths
data_dir: Path = Field(default=Path("data"), description="Main data directory")
uploads_dir: Path = Field(default=Path("data/uploads"), description="Directory for uploaded files")
sessions_file: Path = Field(default=Path("data/sessions.json"), description="Sessions storage file")
memory_file: Path = Field(default=Path("data/memory.json"), description="Memory storage file")
memory_doc: Path = Field(default=Path("data/memory_doc.md"), description="Memory document file")
personal_dir: Path = Field(default=Path("data/personal_docs"), description="Personal documents directory")
runbook_dir: Path = Field(default=Path("data/personal_docs/runbook"), description="Runbook directory")
data_dir: Path = Field(default=Path(_DATA_DIR_CONST), description="Main data directory")
uploads_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "uploads", description="Directory for uploaded files")
sessions_file: Path = Field(default=Path(_DATA_DIR_CONST) / "sessions.json", description="Sessions storage file")
memory_file: Path = Field(default=Path(_DATA_DIR_CONST) / "memory.json", description="Memory storage file")
memory_doc: Path = Field(default=Path(_DATA_DIR_CONST) / "memory_doc.md", description="Memory document file")
personal_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "personal_docs", description="Personal documents directory")
runbook_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "personal_docs" / "runbook", description="Runbook directory")
# Upload settings
max_upload_size: int = Field(default=10 * 1024 * 1024, description="Maximum upload size in bytes (10MB)")
@@ -139,7 +141,7 @@ class AppConfig(BaseSettings):
base_dir = Path(__file__).parent.parent
# Convert string paths to Path objects relative to base_dir
data_dir = base_dir / "data"
data_dir = Path(_DATA_DIR_CONST)
# Get values from the input dict or use defaults
max_upload_size = v.get("max_upload_size", 10 * 1024 * 1024) if isinstance(v, dict) else 10 * 1024 * 1024
+58 -1
View File
@@ -7,9 +7,12 @@ APP_VERSION = "1.0.0"
# Base paths
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
STATIC_DIR = os.path.join(BASE_DIR, "static")
DATA_DIR = os.path.join(BASE_DIR, "data")
DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", os.path.join(BASE_DIR, "data"))
# Data file paths
# Single source of truth: every persisted file/dir lives under DATA_DIR, which
# is the ONLY place ODYSSEUS_DATA_DIR is read. Import these constants instead of
# re-deriving paths from __file__ or a relative "data" literal.
SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json")
MEMORY_FILE = os.path.join(DATA_DIR, "memory.json")
MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md")
@@ -18,6 +21,41 @@ RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook")
UPLOAD_DIR = os.path.join(DATA_DIR, "uploads")
FEATURES_FILE = os.path.join(DATA_DIR, "features.json")
SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json")
AUTH_FILE = os.path.join(DATA_DIR, "auth.json")
USER_PREFS_FILE = os.path.join(DATA_DIR, "user_prefs.json")
PRESETS_FILE = os.path.join(DATA_DIR, "presets.json")
INTEGRATIONS_FILE = os.path.join(DATA_DIR, "integrations.json")
CONTACTS_FILE = os.path.join(DATA_DIR, "contacts.json")
APP_KEY_FILE = os.path.join(DATA_DIR, ".app_key")
EMBEDDING_ENDPOINT_FILE = os.path.join(DATA_DIR, "embedding_endpoint.json")
COOKBOOK_STATE_FILE = os.path.join(DATA_DIR, "cookbook_state.json")
BG_JOBS_FILE = os.path.join(DATA_DIR, "bg_jobs.json")
VAULT_FILE = os.path.join(DATA_DIR, "vault.json")
TIDY_CALENDAR_STATE_FILE = os.path.join(DATA_DIR, "tidy_calendar_state.json")
SKILLS_FILE = os.path.join(DATA_DIR, "skills.json")
APP_DB = os.path.join(DATA_DIR, "app.db")
SCHEDULED_EMAILS_DB = os.path.join(DATA_DIR, "scheduled_emails.db")
EMAIL_CACHE_DB = os.path.join(DATA_DIR, "email_cache.db")
# Data subdirectories
PERSONAL_UPLOADS_DIR = os.path.join(DATA_DIR, "personal_uploads")
EMOJI_CACHE_DIR = os.path.join(DATA_DIR, "emoji_cache")
RAG_DIR = os.path.join(DATA_DIR, "rag")
CHROMA_DIR = os.path.join(DATA_DIR, "chroma")
BG_JOBS_DIR = os.path.join(DATA_DIR, "bg_jobs")
DEEP_RESEARCH_DIR = os.path.join(DATA_DIR, "deep_research")
MCP_OAUTH_DIR = os.path.join(DATA_DIR, "mcp_oauth")
GENERATED_IMAGES_DIR = os.path.join(DATA_DIR, "generated_images")
TTS_CACHE_DIR = os.path.join(DATA_DIR, "tts_cache")
EMAIL_URGENCY_CACHE_DIR = os.path.join(DATA_DIR, "email_urgency_cache")
SKILLS_DIR = os.path.join(DATA_DIR, "skills")
GALLERY_DIR = os.path.join(DATA_DIR, "gallery")
GALLERY_UPLOADS_DIR = os.path.join(DATA_DIR, "gallery_uploads")
MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors")
# Paths with an intentional dedicated env override, defaulting under DATA_DIR.
MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments"))
FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache"))
# Agent tool output limits (single source of truth — imported by tool_execution.py,
# tool_implementations.py, agent_tools.py, and any other module that needs them)
@@ -44,3 +82,22 @@ CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))
# Default parameters
DEFAULT_TEMPERATURE = 1.0
DEFAULT_MAX_TOKENS = 0
def internal_api_base() -> str:
"""Base URL for in-process loopback calls to Odysseus's own API.
Agent tools and background jobs reach admin-gated routes by calling the
running server over HTTP. Resolution order:
1. ODYSSEUS_INTERNAL_BASE - explicit override (e.g. behind a TLS proxy).
2. APP_PORT - http://127.0.0.1:$APP_PORT (docker-compose).
3. Fallback http://127.0.0.1:7000 - legacy default.
127.0.0.1 (not "localhost") avoids IPv6/DNS ambiguity for a strictly-local
call. Without this, loopback tools fail with "All connection attempts
failed" whenever the server is not on port 7000.
"""
override = os.environ.get("ODYSSEUS_INTERNAL_BASE")
if override:
return override.rstrip("/")
return f"http://127.0.0.1:{os.environ.get('APP_PORT', '7000')}"
+3 -2
View File
@@ -19,7 +19,8 @@ import time
from pathlib import Path
import httpx
from core.constants import DATA_DIR, internal_api_base
from core.constants import internal_api_base
from src.constants import COOKBOOK_STATE_FILE
logger = logging.getLogger(__name__)
@@ -130,7 +131,7 @@ async def _stop_serve(session_id: str, remote_host: str = "", ssh_port: str = ""
async def _tick() -> None:
state_path = Path(DATA_DIR) / "cookbook_state.json"
state_path = Path(COOKBOOK_STATE_FILE)
if not state_path.exists():
return
try:
+4 -8
View File
@@ -14,6 +14,8 @@ Set EMBEDDING_URL in .env, e.g.:
import os
from src.constants import FASTEMBED_CACHE_DIR, EMBEDDING_ENDPOINT_FILE
# Windows: force HuggingFace/fastembed to COPY model files rather than symlink
# them. On a network-share/UNC cache dir Windows can't follow HF's symlinks
# ([WinError 1463] "symbolic link cannot be followed"), so ONNX fails to load the
@@ -117,10 +119,7 @@ class FastEmbedClient:
# Persistent cache under data/ so the model survives reboots and so
# the download lands exactly where the admin panel's _is_downloaded()
# check looks (both default to this same path).
cache_dir = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"data", "fastembed_cache",
)
cache_dir = FASTEMBED_CACHE_DIR
os.makedirs(cache_dir, exist_ok=True)
# Windows self-heal: the HuggingFace-hub cache stores model files as
# symlinks (snapshots/<rev>/model.onnx -> ../../blobs/<hash>). On a
@@ -188,10 +187,7 @@ class FastEmbedClient:
def _load_persisted_endpoint() -> dict:
"""Load the custom embedding endpoint saved from the admin panel."""
try:
endpoint_file = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"data", "embedding_endpoint.json",
)
endpoint_file = EMBEDDING_ENDPOINT_FILE
if os.path.exists(endpoint_file):
import json
data = json.loads(open(endpoint_file, encoding="utf-8").read())
+3 -3
View File
@@ -12,6 +12,8 @@ import os
from datetime import datetime
from typing import Optional
from src.constants import AUTH_FILE
logger = logging.getLogger(__name__)
_task_scheduler = None
@@ -54,9 +56,7 @@ def _resolve_event_owner(owner: Optional[str]) -> Optional[str]:
return owner
try:
from src.constants import DATA_DIR
auth_path = os.path.join(DATA_DIR, "auth.json")
auth_path = AUTH_FILE
with open(auth_path, "r", encoding="utf-8") as f:
users = (json.load(f).get("users") or {})
for username, data in users.items():
+3 -1
View File
@@ -4,8 +4,10 @@ from pathlib import Path
from fastapi import HTTPException
from src.constants import GENERATED_IMAGES_DIR
GENERATED_IMAGE_DIR = Path("data/generated_images")
GENERATED_IMAGE_DIR = Path(GENERATED_IMAGES_DIR)
GENERATED_IMAGE_RE = re.compile(
r"^[a-f0-9]{8,64}\.(png|jpg|jpeg|webp|gif|mp4|mov|webm|mkv|m4v)$"
)
+3 -2
View File
@@ -10,10 +10,11 @@ import httpx
from core.atomic_io import atomic_write_json
from core.platform_compat import safe_chmod
from src.secret_storage import decrypt, encrypt, is_encrypted
from src.constants import DATA_DIR, INTEGRATIONS_FILE, SETTINGS_FILE
log = logging.getLogger(__name__)
DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "integrations.json")
DATA_FILE = INTEGRATIONS_FILE
# ---------------------------------------------------------------------------
# Presets
@@ -471,7 +472,7 @@ def get_integrations_prompt() -> str:
def migrate_from_settings() -> None:
"""If data/settings.json has miniflux_url and miniflux_api_key, create a
Miniflux integration and clear those keys from settings."""
settings_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "settings.json")
settings_path = SETTINGS_FILE
if not os.path.exists(settings_path):
return
+3 -1
View File
@@ -7,6 +7,8 @@ A thin wrapper around VectorRAG for backward compatibility and additional featur
import logging
from typing import List, Dict, Any, Optional
from src.constants import CHROMA_DIR
# Try to import from different possible locations
try:
from rag_vector import VectorRAG
@@ -24,7 +26,7 @@ class RAGManager:
Most methods delegate directly to VectorRAG.
"""
def __init__(self, persist_directory: str = "data/chroma"):
def __init__(self, persist_directory: str = CHROMA_DIR):
"""Initialize the RAGManager with VectorRAG."""
self.vector_rag = VectorRAG(persist_directory=persist_directory)
logger.info("RAGManager initialized as wrapper for VectorRAG")
+3 -2
View File
@@ -6,6 +6,8 @@ import logging
import time
from pathlib import Path
from src.constants import RAG_DIR
logger = logging.getLogger(__name__)
rag_instance = None
@@ -41,8 +43,7 @@ def get_rag_manager():
try:
from src.rag_vector import VectorRAG
base_dir = Path(__file__).parent.parent
persist_dir = os.path.join(base_dir, "data", "rag")
persist_dir = RAG_DIR
rag_instance = VectorRAG(persist_directory=persist_dir)
if not rag_instance.healthy:
+3 -1
View File
@@ -12,6 +12,8 @@ import re
import logging
import numpy as np
from typing import List, Dict, Any, Optional, Set
from src.constants import CHROMA_DIR
from pathlib import Path
from src.embedding_lanes import (
@@ -51,7 +53,7 @@ def _generate_doc_id(text: str, owner: str = "") -> str:
class VectorRAG:
"""RAG system using ChromaDB vector storage with hybrid search."""
def __init__(self, persist_directory: str = "data/chroma"):
def __init__(self, persist_directory: str = CHROMA_DIR):
self.persist_directory = persist_directory
self._collection = None
self._model = None
+2 -1
View File
@@ -16,10 +16,11 @@ from pathlib import Path
from typing import Optional, Dict
from src.research_utils import strip_thinking, is_low_quality
from src.constants import DEEP_RESEARCH_DIR
logger = logging.getLogger(__name__)
RESEARCH_DATA_DIR = Path("data/deep_research")
RESEARCH_DATA_DIR = Path(DEEP_RESEARCH_DIR)
_RESEARCH_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9-]{1,128}$")
+2 -1
View File
@@ -25,10 +25,11 @@ from pathlib import Path
from cryptography.fernet import Fernet, InvalidToken
from core.platform_compat import safe_chmod
from src.constants import APP_KEY_FILE
logger = logging.getLogger(__name__)
_KEY_PATH = Path(__file__).resolve().parent.parent / "data" / ".app_key"
_KEY_PATH = Path(APP_KEY_FILE)
_PREFIX = "enc:"
_fernet: Fernet | None = None
+2 -2
View File
@@ -20,14 +20,14 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Tuple
from src.tool_security import is_public_blocked_tool, owner_is_admin_or_single_user
from src.tool_policy import ToolPolicy
from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, MAX_DIFF_LINES
from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, MAX_DIFF_LINES, DATA_DIR
# Persistent working directory for agent subprocesses.
# Resolves to <repo_root>/data, which is the bind-mounted volume in Docker
# (/app/data) and the local data directory for manual installs.
# Using this as cwd and HOME prevents the agent from silently creating files
# in ephemeral container layers that are lost on the next rebuild.
_AGENT_WORKDIR = str(pathlib.Path(__file__).parent.parent / "data")
_AGENT_WORKDIR = DATA_DIR
def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
+4 -4
View File
@@ -12,7 +12,7 @@ import os
import re
from typing import Any, Dict, List, Optional
from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS
from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE
from core.constants import internal_api_base
@@ -4057,7 +4057,7 @@ async def do_manage_research(content: str, owner: Optional[str] = None) -> Dict:
args = {}
action = (args.get("action") or "list").lower()
rid = (args.get("id") or args.get("session_id") or args.get("research_id") or "").strip()
data_dir = _Path("data/deep_research")
data_dir = _Path(DEEP_RESEARCH_DIR)
# SECURITY: the research id is interpolated straight into a filesystem
# path (data/deep_research/<rid>.json) for read AND delete. Without this
@@ -4302,7 +4302,7 @@ async def do_manage_contact(content: str, owner: Optional[str] = None) -> Dict:
def _load_vault_config() -> Dict:
"""Load Vaultwarden config from data/vault.json."""
from pathlib import Path
p = Path("data/vault.json")
p = Path(VAULT_FILE)
if p.exists():
try:
return json.loads(p.read_text(encoding="utf-8"))
@@ -4456,7 +4456,7 @@ async def do_vault_unlock(content: str, owner: Optional[str] = None) -> Dict:
# Save session to vault.json
from pathlib import Path
p = Path("data/vault.json")
p = Path(VAULT_FILE)
cfg = {}
if p.exists():
try:
+5 -4
View File
@@ -118,10 +118,11 @@ def test_pairing_payload_shape():
@pytest.mark.parametrize("payload", ["[]", '{"users": []}'])
def test_find_admin_user_ignores_invalid_auth_shape(tmp_path, monkeypatch, payload):
data_dir = tmp_path / "data"
data_dir.mkdir()
(data_dir / "auth.json").write_text(payload)
monkeypatch.chdir(tmp_path)
auth_file = tmp_path / "auth.json"
auth_file.write_text(payload)
# find_admin_user reads the import-time AUTH_FILE constant, so redirect that
# rather than relying on cwd.
monkeypatch.setattr(P, "AUTH_FILE", str(auth_file))
assert P.find_admin_user() is None
-29
View File
@@ -1,29 +0,0 @@
"""Guard: cookbook_state.json must be located via DATA_DIR, not hardcoded /app/data
(which breaks native runs) or a relative os.environ fallback."""
import pathlib
ROOT = pathlib.Path(__file__).resolve().parent.parent
FILES = [
"src/cookbook_serve_lifecycle.py",
"src/builtin_actions.py",
"routes/codex_routes.py",
"routes/cookbook_routes.py",
]
def test_no_hardcoded_app_data_cookbook_state():
for rel in FILES:
text = (ROOT / rel).read_text(encoding="utf-8")
for ln in text.splitlines():
if ln.strip().startswith("#"):
continue
assert "/app/data/cookbook_state" not in ln, f"{rel}: hardcoded /app/data: {ln.strip()}"
assert 'os.environ.get("DATA_DIR"' not in ln, f"{rel}: relative DATA_DIR env fallback: {ln.strip()}"
def test_cookbook_state_uses_datadir_constant():
# Each file that references cookbook_state.json should import the DATA_DIR constant.
for rel in FILES:
text = (ROOT / rel).read_text(encoding="utf-8")
if "cookbook_state.json" in text:
assert "from core.constants import DATA_DIR" in text, f"{rel}: missing DATA_DIR import"
+10
View File
@@ -11,6 +11,16 @@ from fastapi import HTTPException
from routes.research_routes import setup_research_routes
@pytest.fixture(autouse=True)
def _redirect_research_dir(tmp_path, monkeypatch):
# Deep-research paths are resolved from an import-time constant now, so chdir
# no longer redirects them. Point the constant the routes read at the temp dir.
monkeypatch.setattr(
"routes.research_routes.DEEP_RESEARCH_DIR",
str(tmp_path / "data" / "deep_research"),
)
def _request(user: str):
return SimpleNamespace(state=SimpleNamespace(current_user=user))
+4 -4
View File
@@ -946,7 +946,7 @@ def _import_mcp_routes():
def test_mcp_oauth_paths_resolve_under_data_dir(tmp_path, monkeypatch):
mcp_routes = _import_mcp_routes()
monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data"))
monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth"))
resolved = Path(mcp_routes._resolve_mcp_oauth_path("gmail/credentials.json", "token_file"))
@@ -963,7 +963,7 @@ def test_mcp_oauth_paths_reject_escapes(tmp_path, monkeypatch, raw_path):
from fastapi import HTTPException
mcp_routes = _import_mcp_routes()
monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data"))
monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth"))
with pytest.raises(HTTPException) as exc:
mcp_routes._resolve_mcp_oauth_path(raw_path, "token_file")
@@ -974,7 +974,7 @@ def test_mcp_oauth_filename_join_cannot_escape_base(tmp_path, monkeypatch):
from fastapi import HTTPException
mcp_routes = _import_mcp_routes()
monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data"))
monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth"))
safe_dir = mcp_routes._resolve_mcp_oauth_path("gmail", "dir")
with pytest.raises(HTTPException):
@@ -983,7 +983,7 @@ def test_mcp_oauth_filename_join_cannot_escape_base(tmp_path, monkeypatch):
def test_mcp_oauth_config_sanitizes_paths_and_env(tmp_path, monkeypatch):
mcp_routes = _import_mcp_routes()
monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data"))
monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth"))
cfg = mcp_routes._sanitize_mcp_oauth_config({
"provider": "google",
+1 -1
View File
@@ -13,7 +13,7 @@ def _load_setup_module():
def test_create_default_admin_normalizes_env_username(tmp_path, monkeypatch):
setup_module = _load_setup_module()
monkeypatch.setattr(setup_module, "DATA_DIR", str(tmp_path))
monkeypatch.setattr(setup_module, "AUTH_FILE", str(tmp_path / "auth.json"))
monkeypatch.setenv("ODYSSEUS_ADMIN_USER", " AdminUser ")
monkeypatch.setenv("ODYSSEUS_ADMIN_PASSWORD", "temporary-password")