refactor(constants): single source of truth for data dir (#3368)

* refactor(constants): single source of truth for data dir + merge core/src constants

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* docs(contributing): use named src.constants for data paths, drop core/constants references

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mike
2026-06-08 09:58:52 +02:00
committed by GitHub
parent adc6ac9394
commit ac94885c84
56 changed files with 279 additions and 243 deletions
+7
View File
@@ -56,6 +56,13 @@ SEARXNG_INSTANCE=http://localhost:8080
# SQLite database path (default: sqlite:///./data/app.db)
# DATABASE_URL=sqlite:///./data/app.db
# ============================================================
# Data directory
# ============================================================
# Move everything that lives under data/ - settings, sessions, database, auth,
# cache, uploads, etc. - to another path:
# ODYSSEUS_DATA_DIR=C:\path\to\dir
# ============================================================
# Auth & Security
# ============================================================
+3 -3
View File
@@ -98,11 +98,11 @@ If you are unsure whether a change is "visual," it is. Default to attaching a sc
Don't hardcode values that the project already exposes through a constant or a helper. Hardcoded literals drift out of sync, break on non-default deployments, and reintroduce bugs we've already fixed.
- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree or hardcode `/app/...`. Use `DATA_DIR` (and the other path constants) from `core.constants`, e.g. `Path(DATA_DIR) / "logs" / "x.log"`. The source tree is read-only in Docker, and `/app/...` does not exist on native runs. Guard directory creation so an unwritable path degrades gracefully instead of crashing at import.
- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `core.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`).
- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree, hardcode `/app/...`, or use a relative `"data/..."` string. Every persisted file and directory has a named constant in `src/constants.py` (for example `AUTH_FILE`, `USER_PREFS_FILE`, `SETTINGS_FILE`, `TTS_CACHE_DIR`, `CHROMA_DIR`). Import and use that named constant; do not re-derive the path locally with `os.path.join(DATA_DIR, "x.json")` or `DATA_DIR / "x.json"`. `DATA_DIR` is the single place that reads `ODYSSEUS_DATA_DIR`, so use it directly only for dynamic paths that have no fixed name (for example per-owner files). If a data file or directory has no constant yet, add one to `src/constants.py`. The source tree is read-only in Docker and `/app/...` does not exist on native runs; guard directory creation so an unwritable path degrades gracefully instead of crashing at import.
- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `src.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`).
- **Ports, limits, model lists, and similar:** reuse the existing constant if one exists; if it doesn't and the value is used in more than one place, add a constant rather than copying the literal.
If you need a value that has no constant or helper yet, add one in the appropriate module (usually `core/constants.py` or `src/constants.py`) and import it, rather than repeating a literal across files.
If you need a value that has no constant or helper yet, add it to `src/constants.py` (the single source of truth for paths and config; `core/constants.py` only re-exports it for backward compatibility) and import it, rather than repeating a literal across files.
**Commits:** use [Conventional Commits](https://www.conventionalcommits.org), `type(scope): summary` (e.g. `fix(search): ...`, `feat(notes): ...`, `docs(contributing): ...`). Common types: `fix`, `feat`, `refactor`, `docs`, `test`, `chore`, `ci`. Keep the subject short and imperative; put the "why" in the body when it isn't obvious.
+3 -3
View File
@@ -51,7 +51,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
# Core imports
from core.constants import (
BASE_DIR, STATIC_DIR, SESSIONS_FILE,
REQUEST_TIMEOUT, OPENAI_API_KEY,
REQUEST_TIMEOUT, OPENAI_API_KEY, AUTH_FILE,
)
from core.database import SessionLocal, ApiToken
from core.middleware import SecurityHeadersMiddleware, is_cors_preflight
@@ -954,7 +954,7 @@ async def _startup_event():
owners = set()
try:
import json as _json
auth_path = "data/auth.json"
auth_path = AUTH_FILE
with open(auth_path, encoding="utf-8") as f:
users = _json.load(f).get("users", {})
owners.update(users.keys())
@@ -1001,7 +1001,7 @@ async def _startup_event():
# does not make an existing library look empty after auth/account changes.
try:
import json as _json
auth_path = "data/auth.json"
auth_path = AUTH_FILE
with open(auth_path, encoding="utf-8") as f:
users = _json.load(f).get("users", {})
primary_owner = None
+3 -1
View File
@@ -14,6 +14,8 @@ import uuid
import bcrypt
from src.constants import AUTH_FILE
PAIRING_VERSION = 1
COMPANION_SCOPE = "chat"
@@ -61,7 +63,7 @@ def lan_ip_candidates() -> list[str]:
def find_admin_user() -> str | None:
"""Resolve an admin username from data/auth.json (schema uses is_admin),
falling back to the first user."""
auth_path = os.path.join("data", "auth.json")
auth_path = AUTH_FILE
try:
with open(auth_path, "r", encoding="utf-8") as f:
data = json.load(f)
+2 -3
View File
@@ -37,9 +37,8 @@ DEFAULT_PRIVILEGES = {
ADMIN_PRIVILEGES = {k: (True if isinstance(v, bool) else (0 if isinstance(v, int) else [])) for k, v in DEFAULT_PRIVILEGES.items()}
ADMIN_PRIVILEGES["allowed_models_restricted"] = False
DEFAULT_AUTH_PATH = os.path.join(
Path(__file__).parent.parent, "data", "auth.json"
)
from src.constants import AUTH_FILE
DEFAULT_AUTH_PATH = AUTH_FILE
TOKEN_TTL = 60 * 60 * 24 * 7 # 7 days
# Usernames the auth + middleware layer reserve as internal "synthetic owner"
+11 -58
View File
@@ -1,59 +1,12 @@
# src/constants.py
"""Application-wide constants and configuration values."""
import os
# core/constants.py
"""Backward-compatible shim — the single source of truth is src/constants.py.
APP_VERSION = "0.9.1"
# Base paths
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
STATIC_DIR = os.path.join(BASE_DIR, "static")
DATA_DIR = os.path.join(BASE_DIR, "data")
# Data file paths
SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json")
MEMORY_FILE = os.path.join(DATA_DIR, "memory.json")
MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md")
PERSONAL_DIR = os.path.join(DATA_DIR, "personal_docs")
RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook")
UPLOAD_DIR = os.path.join(DATA_DIR, "uploads")
FEATURES_FILE = os.path.join(DATA_DIR, "features.json")
SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json")
# API Configuration
MAX_CONTEXT_MESSAGES = 90
REQUEST_TIMEOUT = 20
OPENAI_COMPAT_PATH = "/v1/chat/completions"
# Environment variables with defaults
DEFAULT_HOST = os.getenv("LLM_HOST", "localhost")
LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()]
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
SEARXNG_INSTANCE = os.getenv('SEARXNG_INSTANCE', 'http://localhost:8080')
# Cleanup configuration
CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true"
CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))
# Default parameters
DEFAULT_TEMPERATURE = 1.0
DEFAULT_MAX_TOKENS = 0
def internal_api_base() -> str:
"""Base URL for in-process loopback calls to Odysseus's own API.
Agent tools and background jobs reach admin-gated routes by calling the
running server over HTTP. Resolution order:
1. ODYSSEUS_INTERNAL_BASE - explicit override (e.g. behind a TLS proxy).
2. APP_PORT - http://127.0.0.1:$APP_PORT (docker-compose).
3. Fallback http://127.0.0.1:7000 - legacy default.
127.0.0.1 (not "localhost") avoids IPv6/DNS ambiguity for a strictly-local
call. Without this, loopback tools fail with "All connection attempts
failed" whenever the server is not on port 7000.
"""
override = os.environ.get("ODYSSEUS_INTERNAL_BASE")
if override:
return override.rstrip("/")
return f"http://127.0.0.1:{os.environ.get('APP_PORT', '7000')}"
Historically there were two copies of this module (this one lagged behind at
APP_VERSION 0.9.1 and was missing the consolidated tool-output constants). To
kill the drift, this now simply re-exports everything from src.constants so
there is exactly one place that defines paths and reads ODYSSEUS_DATA_DIR.
internal_api_base() also lives in src.constants now and is re-exported here so
existing `from core.constants import internal_api_base` callers keep working.
"""
from src.constants import * # noqa: F401,F403
from src.constants import internal_api_base # noqa: F401 (explicit: functions aren't covered by some linters' * checks)
+7 -6
View File
@@ -29,8 +29,9 @@ class TimestampMixin:
def updated_at(cls):
return Column(DateTime, default=utcnow_naive, onupdate=utcnow_naive, nullable=False)
# Get database URL from environment, default to SQLite
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/app.db")
# Get database URL from environment, default to SQLite in DATA_DIR
from src.constants import DATA_DIR, AUTH_FILE, MEMORY_FILE, USER_PREFS_FILE, SETTINGS_FILE
DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{DATA_DIR}/app.db")
# Create engine
engine = create_engine(
@@ -1065,7 +1066,7 @@ def _migrate_assign_legacy_owner():
# fell through to "first user" every time.
auth_path = os.path.join(os.path.dirname(DATABASE_URL.replace("sqlite:///", "")), "auth.json")
if not os.path.isabs(auth_path):
auth_path = os.path.join("data", "auth.json")
auth_path = AUTH_FILE
admin_user = None
try:
with open(auth_path, "r", encoding="utf-8") as f:
@@ -1118,7 +1119,7 @@ def _migrate_assign_legacy_owner():
logger.warning(f"Legacy owner migration failed: {e}")
# Also migrate memory.json
mem_path = os.path.join("data", "memory.json")
mem_path = MEMORY_FILE
try:
if os.path.exists(mem_path):
with open(mem_path, "r", encoding="utf-8") as f:
@@ -1136,7 +1137,7 @@ def _migrate_assign_legacy_owner():
logger.warning(f"memory.json legacy migration failed: {e}")
# Also migrate user_prefs.json to per-user format
prefs_path = os.path.join("data", "user_prefs.json")
prefs_path = USER_PREFS_FILE
try:
if os.path.exists(prefs_path):
with open(prefs_path, "r", encoding="utf-8") as f:
@@ -1530,7 +1531,7 @@ def _migrate_seed_email_account():
import json as _json
import uuid as _uuid
from pathlib import Path
settings_file = Path("data/settings.json")
settings_file = Path(SETTINGS_FILE)
if not settings_file.exists():
return
try:
+6 -5
View File
@@ -31,7 +31,8 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
server = Server("email")
EMAIL_SOCKET_TIMEOUT = float(os.environ.get("EMAIL_SOCKET_TIMEOUT", "20"))
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
from src.constants import DATA_DIR as _DATA_DIR, APP_DB, EMAIL_CACHE_DB, SETTINGS_FILE as _SETTINGS_FILE, MAIL_ATTACHMENTS_DIR
DATA_DIR = Path(_DATA_DIR)
def _b(value) -> bytes:
@@ -63,7 +64,7 @@ def _clean_header_value(value) -> str:
def _db_path() -> Path:
return DATA_DIR / "app.db"
return Path(APP_DB)
def _list_accounts_raw() -> list:
@@ -162,7 +163,7 @@ def _load_config(account: str | None = None) -> dict:
"trash_folder": os.environ.get("TRASH_FOLDER", "Trash"),
"cache_db": os.environ.get(
"EMAIL_CACHE_DB",
str(DATA_DIR / "email_cache.db"),
EMAIL_CACHE_DB,
),
"account_id": None,
"account_name": None,
@@ -204,7 +205,7 @@ def _load_config(account: str | None = None) -> dict:
else:
# Legacy fallback: settings.json flat keys
try:
settings_path = Path(__file__).resolve().parent.parent / "data" / "settings.json"
settings_path = Path(_SETTINGS_FILE)
if settings_path.exists():
settings = json.loads(settings_path.read_text(encoding="utf-8"))
for key in (
@@ -1061,7 +1062,7 @@ def _download_attachment(uid, index, folder="INBOX", account=None):
raw = msg_data[0][1]
msg = email.message_from_bytes(raw)
target_dir = DATA_DIR / "mail-attachments" / f"{folder}_{uid}"
target_dir = Path(MAIL_ATTACHMENTS_DIR) / f"{folder}_{uid}"
filepath = _extract_attachment_to_disk(msg, index, target_dir)
if not filepath:
return {"error": f"Attachment index {index} not found"}
+3 -1
View File
@@ -16,6 +16,8 @@ from mcp.types import Tool, TextContent
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from src.constants import GENERATED_IMAGES_DIR
server = Server("image_gen")
@@ -121,7 +123,7 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
_pub_base = (get_setting("app_public_url", "") or "").rstrip("/")
if img.get("b64_json"):
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
filename = f"{uuid.uuid4().hex[:12]}.png"
img_path = img_dir / filename
+5 -5
View File
@@ -31,7 +31,7 @@ from core.database import (
CalendarEvent,
CalendarCal,
)
from src.constants import DATA_DIR
from src.constants import DATA_DIR, SKILLS_DIR, SKILLS_FILE, GALLERY_DIR, GALLERY_UPLOADS_DIR
logger = logging.getLogger(__name__)
@@ -107,7 +107,7 @@ def setup_admin_wipe_routes(session_manager):
# Skills live as SKILL.md files under data/skills/. Drop
# the entire directory; the SkillsManager re-creates the
# tree on next write.
skills_dir = os.path.join(DATA_DIR, "skills")
skills_dir = SKILLS_DIR
count = 0
if os.path.isdir(skills_dir):
# Count SKILL.md files for the response — quick walk.
@@ -115,7 +115,7 @@ def setup_admin_wipe_routes(session_manager):
count += sum(1 for f in files if f == "SKILL.md")
_rmtree_quiet(skills_dir)
# Legacy fallback file
legacy = os.path.join(DATA_DIR, "skills.json")
legacy = SKILLS_FILE
if os.path.exists(legacy):
try:
os.remove(legacy)
@@ -151,8 +151,8 @@ def setup_admin_wipe_routes(session_manager):
db.query(GalleryAlbum).delete()
db.commit()
# Also drop the upload dir so disk doesn't keep orphans.
_rmtree_quiet(os.path.join(DATA_DIR, "gallery"))
_rmtree_quiet(os.path.join(DATA_DIR, "gallery_uploads"))
_rmtree_quiet(GALLERY_DIR)
_rmtree_quiet(GALLERY_UPLOADS_DIR)
return {"status": "deleted", "kind": kind, "count": count}
if kind == "calendar":
+4 -4
View File
@@ -17,7 +17,7 @@ from fastapi.responses import StreamingResponse
from src.auth_helpers import require_authenticated_request, require_user
from src.tool_implementations import do_manage_notes
from core.constants import DATA_DIR
from src.constants import COOKBOOK_STATE_FILE
COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"}
@@ -425,8 +425,8 @@ def setup_codex_routes(
def _read_cookbook_state() -> dict:
from pathlib import Path as _Path
import os as _os, json as _json
p = _Path(DATA_DIR) / "cookbook_state.json"
import json as _json
p = _Path(COOKBOOK_STATE_FILE)
if not p.exists():
return {}
try:
@@ -734,7 +734,7 @@ def setup_codex_routes(
import time as _t, json as _json
from core.atomic_io import atomic_write_json
from pathlib import Path as _Path
cookbook_state_path = _Path(DATA_DIR) / "cookbook_state.json"
cookbook_state_path = _Path(COOKBOOK_STATE_FILE)
try:
state = _json.loads(cookbook_state_path.read_text(encoding="utf-8"))
except Exception:
+4 -3
View File
@@ -25,9 +25,10 @@ from src.url_safety import check_outbound_url
logger = logging.getLogger(__name__)
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
SETTINGS_FILE = DATA_DIR / "settings.json"
LOCAL_CONTACTS_FILE = DATA_DIR / "contacts.json"
from src.constants import DATA_DIR as _DATA_DIR, SETTINGS_FILE as _SETTINGS_FILE, CONTACTS_FILE as _CONTACTS_FILE
DATA_DIR = Path(_DATA_DIR)
SETTINGS_FILE = Path(_SETTINGS_FILE)
LOCAL_CONTACTS_FILE = Path(_CONTACTS_FILE)
def _load_settings():
+3 -2
View File
@@ -15,6 +15,7 @@ from pathlib import Path
from fastapi import APIRouter, HTTPException, Request, Depends
from src.auth_helpers import require_user
from src.constants import COOKBOOK_STATE_FILE
from pydantic import BaseModel
from core.middleware import require_admin
@@ -33,7 +34,7 @@ from core.platform_compat import (
get_wsl_windows_user_profile,
)
from routes.shell_routes import TMUX_LOG_DIR
from core.constants import DATA_DIR
from src.constants import COOKBOOK_STATE_FILE
logger = logging.getLogger(__name__)
@@ -61,7 +62,7 @@ _HF_TOKEN_STATUS_SNIPPET = (
def setup_cookbook_routes() -> APIRouter:
router = APIRouter(tags=["cookbook"])
_cookbook_state_path = Path(DATA_DIR) / "cookbook_state.json"
_cookbook_state_path = Path(COOKBOOK_STATE_FILE)
def _mask_secret(value: str) -> str:
if not value:
+2 -4
View File
@@ -11,6 +11,7 @@ from sqlalchemy import case, func, or_
from core.database import SessionLocal, Document, DocumentVersion
from core.database import Session as DbSession
from src.auth_helpers import get_current_user
from src.constants import MAIL_ATTACHMENTS_DIR
logger = logging.getLogger(__name__)
@@ -1542,10 +1543,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
# don't import from a routes file (cycle-prone). Same env override
# as email_routes (ODYSSEUS_MAIL_ATTACHMENTS_DIR).
from pathlib import Path as _Path
import os as _os
_DATA_DIR = _Path(__file__).resolve().parent.parent / "data"
_BASE = _os.environ.get("ODYSSEUS_MAIL_ATTACHMENTS_DIR", str(_DATA_DIR / "mail-attachments"))
_COMPOSE_DIR = _Path(_BASE) / "_compose"
_COMPOSE_DIR = _Path(MAIL_ATTACHMENTS_DIR) / "_compose"
_COMPOSE_DIR.mkdir(parents=True, exist_ok=True)
user = get_current_user(request)
+5 -4
View File
@@ -254,16 +254,17 @@ def _cleanup_compose_uploads(tokens) -> None:
pass
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
SETTINGS_FILE = DATA_DIR / "settings.json"
from src.constants import DATA_DIR as _DATA_DIR, MAIL_ATTACHMENTS_DIR, SETTINGS_FILE as _SETTINGS_FILE, SCHEDULED_EMAILS_DB
DATA_DIR = Path(_DATA_DIR)
SETTINGS_FILE = Path(_SETTINGS_FILE)
# Override at deploy time via ODYSSEUS_MAIL_ATTACHMENTS_DIR. Defaults to a
# subdir of the install's data/ tree so the app works out-of-the-box without
# a hardcoded /home/<user>/ path.
ATTACHMENTS_DIR = Path(os.environ.get("ODYSSEUS_MAIL_ATTACHMENTS_DIR", str(DATA_DIR / "mail-attachments")))
ATTACHMENTS_DIR = Path(MAIL_ATTACHMENTS_DIR)
ATTACHMENTS_DIR.mkdir(parents=True, exist_ok=True)
COMPOSE_UPLOADS_DIR = ATTACHMENTS_DIR / "_compose"
COMPOSE_UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
SCHEDULED_DB = DATA_DIR / "scheduled_emails.db"
SCHEDULED_DB = Path(SCHEDULED_EMAILS_DB)
OWNER_SCOPED_EMAIL_CACHE_TABLES = {
+2 -1
View File
@@ -32,6 +32,7 @@ from email.mime.multipart import MIMEMultipart
from fastapi import APIRouter, Query, UploadFile, File, BackgroundTasks, HTTPException, Depends, Request
from fastapi.responses import FileResponse
from src.constants import DATA_DIR
from src.llm_core import llm_call_async
from src.upload_limits import read_upload_limited
@@ -2904,7 +2905,7 @@ def setup_email_routes():
from pathlib import Path as _P
import json as _json
_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
path = _P(f"data/email_urgency_state_{_slug}.json")
path = _P(DATA_DIR) / f"email_urgency_state_{_slug}.json"
if not path.exists():
return {"total_unread": 0, "total_urgent": 0, "max_score": 0, "per_uid": {}}
try:
+3 -9
View File
@@ -7,12 +7,12 @@ import logging
import asyncio
from pathlib import Path
from fastapi import APIRouter, HTTPException, Form, Depends
from core.constants import BASE_DIR
from core.constants import EMBEDDING_ENDPOINT_FILE, FASTEMBED_CACHE_DIR
from core.middleware import require_admin
logger = logging.getLogger(__name__)
_ENDPOINT_FILE = os.path.join(BASE_DIR, "data", "embedding_endpoint.json")
_ENDPOINT_FILE = EMBEDDING_ENDPOINT_FILE
# Track in-progress downloads
_downloading: dict = {}
@@ -35,13 +35,7 @@ def _cache_dir() -> str:
default lived in /tmp, which many systems wipe on reboot forcing a
full re-download of the embedding model after every restart.
"""
env = os.environ.get("FASTEMBED_CACHE_PATH")
if env:
return env
return os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"data", "fastembed_cache",
)
return FASTEMBED_CACHE_DIR
def _model_cache_name(hf_source: str) -> str:
+3 -1
View File
@@ -18,9 +18,11 @@ import httpx
from fastapi import APIRouter
from fastapi.responses import Response
from src.constants import EMOJI_CACHE_DIR
logger = logging.getLogger(__name__)
_CACHE_DIR = Path(__file__).resolve().parent.parent / "data" / "emoji_cache"
_CACHE_DIR = Path(EMOJI_CACHE_DIR)
# OpenMoji "black" set = monochrome line-art SVGs. Filenames are the codepoints
# in UPPERCASE (FE0F dropped, same as we compute), '-' joined.
_OPENMOJI_BASE = "https://cdn.jsdelivr.net/npm/openmoji@15.0.0/black/svg"
+4 -3
View File
@@ -14,6 +14,7 @@ from core.database import SessionLocal, GalleryImage, GalleryAlbum, ModelEndpoin
from core.database import Session as DbSession
from src.auth_helpers import get_current_user, require_privilege
from src.upload_limits import read_upload_limited
from src.constants import GENERATED_IMAGES_DIR
from routes.gallery_helpers import (
GalleryPatch, _extract_exif, _image_to_dict, _owner_filter, _human_size,
@@ -33,7 +34,7 @@ def _sanitize_gallery_filename(filename: str) -> str:
return safe_name
GALLERY_IMAGE_DIR = Path("data/generated_images")
GALLERY_IMAGE_DIR = Path(GENERATED_IMAGES_DIR)
def _gallery_image_path(filename: str) -> Path:
@@ -133,7 +134,7 @@ def setup_gallery_routes() -> APIRouter:
return {"ok": False, "duplicate": True, "filename": existing.filename,
"id": existing.id, "message": "Duplicate photo skipped"}
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
ext = file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else "png"
@@ -199,7 +200,7 @@ def setup_gallery_routes() -> APIRouter:
raise HTTPException(400, "No image provided")
content = await read_upload_limited(file, GALLERY_UPLOAD_MAX_BYTES, "Gallery replacement")
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
img_path = img_dir / _sanitize_gallery_filename(img.filename)
img_path.write_bytes(content)
+2 -2
View File
@@ -13,7 +13,7 @@ import httpx
from core.database import McpServer, SessionLocal
from core.middleware import require_admin
from src.constants import DATA_DIR
from src.constants import DATA_DIR, MCP_OAUTH_DIR
from src.mcp_manager import McpManager
logger = logging.getLogger(__name__)
@@ -23,7 +23,7 @@ router = APIRouter(prefix="/api/mcp", tags=["mcp"])
def _mcp_oauth_base_dir() -> Path:
"""Directory that may contain OAuth files managed by Odysseus."""
return (Path(DATA_DIR) / "mcp_oauth").resolve(strict=False)
return Path(MCP_OAUTH_DIR).resolve(strict=False)
def _resolve_mcp_oauth_path(raw_path, field_name: str) -> str:
+3 -2
View File
@@ -11,6 +11,7 @@ from pydantic import BaseModel
from core.database import SessionLocal, Note
from src.auth_helpers import get_current_user
from src.constants import DATA_DIR
from sqlalchemy.orm.attributes import flag_modified
logger = logging.getLogger(__name__)
@@ -170,7 +171,7 @@ async def dispatch_reminder(
from datetime import datetime as _dt, timezone as _tz, timedelta as _td
from pathlib import Path as _P
_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
cache_path = _P(f"data/note_pings_{_slug}.json")
cache_path = _P(DATA_DIR) / f"note_pings_{_slug}.json"
if cache_path.exists():
cache = _json.loads(cache_path.read_text(encoding="utf-8"))
last = cache.get(cache_key)
@@ -523,7 +524,7 @@ async def dispatch_reminder(
_STATE = cache_path
if _STATE is None:
_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
_STATE = _P(f"data/note_pings_{_slug}.json")
_STATE = _P(DATA_DIR) / f"note_pings_{_slug}.json"
_STATE.parent.mkdir(parents=True, exist_ok=True)
try:
_cache = cache or (_json.loads(_STATE.read_text(encoding="utf-8")) if _STATE.exists() else {})
+2 -2
View File
@@ -6,13 +6,13 @@ import uuid
from typing import List, Tuple
from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
from src.request_models import DirectoryRequest
from core.constants import BASE_DIR, PERSONAL_DIR
from core.constants import BASE_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR
from src.rag_singleton import get_rag_manager
from src.auth_helpers import require_privilege, require_user
from core.middleware import require_admin
from src.upload_handler import secure_filename
UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads")
UPLOADS_DIR = PERSONAL_UPLOADS_DIR
MAX_PERSONAL_UPLOAD_BYTES = int(
os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024))
)
+2 -1
View File
@@ -4,8 +4,9 @@ import os
from typing import Optional
from fastapi import APIRouter, Request
from src.auth_helpers import get_current_user
from src.constants import USER_PREFS_FILE
PREFS_FILE = os.path.join("data", "user_prefs.json")
PREFS_FILE = USER_PREFS_FILE
def _load():
+9 -8
View File
@@ -14,6 +14,7 @@ from fastapi.responses import HTMLResponse, StreamingResponse
from pydantic import BaseModel, Field
from src.endpoint_resolver import resolve_endpoint
from src.auth_helpers import _auth_disabled, get_current_user
from src.constants import DEEP_RESEARCH_DIR
_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")
@@ -100,7 +101,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if entry is not None:
return entry.get("owner", "") == user
# Task no longer in memory — check the persisted JSON.
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if not path.exists():
return False
try:
@@ -164,7 +165,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
def _assert_owns_research(session_id: str, user: str) -> None:
"""404-not-403 ownership gate for a research session's on-disk JSON.
Use BEFORE returning any data or mutating the file."""
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if not path.exists():
raise HTTPException(404, "Research not found")
try:
@@ -227,7 +228,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
):
user = _require_user(request)
"""List all completed research for the Library panel."""
data_dir = Path("data/deep_research")
data_dir = Path(DEEP_RESEARCH_DIR)
items = []
for p in data_dir.glob("*.json"):
try:
@@ -277,7 +278,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
summary, stats used by the Library preview panel."""
user = _require_user(request)
_validate_session_id(session_id)
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if not path.exists():
raise HTTPException(404, "Research not found")
try:
@@ -294,7 +295,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
"""Soft-archive / restore a research report (sets `archived` in its JSON)."""
user = _require_user(request)
_validate_session_id(session_id)
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if not path.exists():
raise HTTPException(404, "Research not found")
try:
@@ -314,7 +315,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
"""Delete a research result from disk."""
user = _require_user(request)
_validate_session_id(session_id)
data_dir = Path("data/deep_research")
data_dir = Path(DEEP_RESEARCH_DIR)
json_path = data_dir / f"{session_id}.json"
deleted = False
if json_path.exists():
@@ -496,7 +497,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
raise HTTPException(404, "No research found for this session")
result = research_handler.get_result(session_id)
if result is None:
p = Path("data/deep_research") / f"{session_id}.json"
p = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if p.exists():
d = json.loads(p.read_text(encoding="utf-8"))
return {
@@ -536,7 +537,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
sources = research_handler.get_sources(session_id) or []
query = ""
path = Path("data/deep_research") / f"{session_id}.json"
path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
if path.exists():
try:
disk = json.loads(path.read_text(encoding="utf-8"))
+3 -2
View File
@@ -13,6 +13,7 @@ from pydantic import BaseModel
from core.database import SessionLocal, ScheduledTask, TaskRun
from core.constants import internal_api_base
from src.auth_helpers import get_current_user
from src.constants import DATA_DIR, EMAIL_URGENCY_CACHE_DIR
from src.task_scheduler import compute_next_run, HOUSEKEEPING_DEFAULTS
from routes.prefs_routes import _load_for_user, _save_for_user
@@ -621,7 +622,7 @@ def setup_task_routes(task_scheduler) -> APIRouter:
removed_files = 0
if action == "check_email_urgency":
cache_dir = Path("data/email_urgency_cache")
cache_dir = Path(EMAIL_URGENCY_CACHE_DIR)
if cache_dir.exists():
for child in cache_dir.glob("*.json"):
try:
@@ -630,7 +631,7 @@ def setup_task_routes(task_scheduler) -> APIRouter:
except Exception:
pass
owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (user or "default"))
for state_path in [Path(f"data/email_urgency_state_{owner_slug}.json")]:
for state_path in [Path(DATA_DIR) / f"email_urgency_state_{owner_slug}.json"]:
try:
if state_path.exists():
state_path.unlink()
+2 -1
View File
@@ -17,10 +17,11 @@ from pydantic import BaseModel
from core.middleware import require_admin
from core.platform_compat import IS_WINDOWS, safe_chmod, which_tool
from src.constants import VAULT_FILE as _VAULT_FILE
logger = logging.getLogger(__name__)
VAULT_FILE = Path("data/vault.json")
VAULT_FILE = Path(_VAULT_FILE)
def _find_bw() -> str:
+4 -2
View File
@@ -13,6 +13,8 @@ import json
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.constants import MEMORY_FILE, SKILLS_FILE
def claim_json_entries(entries, owner):
count = 0
@@ -35,8 +37,8 @@ def main():
# 1. Memories (JSON files)
for label, path in [
("memory.json", "data/memory.json"),
("skills.json", "data/skills.json"),
("memory.json", MEMORY_FILE),
("skills.json", SKILLS_FILE),
]:
if not os.path.exists(path):
print(f" {label}: not found, skipping")
+4 -1
View File
@@ -19,6 +19,9 @@ import sys
from pathlib import Path
from typing import List, Tuple
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.constants import PERSONAL_DIR
# Configure logging for the script
logging.basicConfig(
level=logging.INFO,
@@ -45,7 +48,7 @@ def main():
rag_manager = RAGManager()
# Directory to scan
docs_directory = "data/personal_docs"
docs_directory = PERSONAL_DIR
directory_path = Path(docs_directory)
# Check if directory exists
+3 -3
View File
@@ -63,10 +63,10 @@ def migrate_memories():
"""Migrate memory vectors from FAISS to ChromaDB."""
from src.chroma_client import get_chroma_client
from src.embeddings import get_embedding_client
from src.constants import DATA_DIR
from src.constants import MEMORY_VECTORS_DIR, MEMORY_FILE
ids_path = os.path.join(DATA_DIR, "memory_vectors", "ids.json")
memory_path = os.path.join(DATA_DIR, "memory.json")
ids_path = os.path.join(MEMORY_VECTORS_DIR, "ids.json")
memory_path = MEMORY_FILE
if not os.path.exists(ids_path):
logger.info("No memory FAISS index found, skipping memory migration")
+2 -1
View File
@@ -5,6 +5,7 @@ from dataclasses import dataclass
from typing import List, Dict, Any
from src.rag_manager import RAGManager
from src.constants import CHROMA_DIR
@dataclass
@@ -34,7 +35,7 @@ class DocsService:
results = await service.query("what is async await?")
"""
def __init__(self, persist_dir: str = "data/chroma"):
def __init__(self, persist_dir: str = CHROMA_DIR):
self.rag = RAGManager(persist_directory=persist_dir)
async def query(self, query: str, top_k: int = 5) -> List[DocChunk]:
+2 -1
View File
@@ -8,6 +8,7 @@ import os
from .memory import MemoryManager
from .memory_vector import MemoryVectorStore
from src.memory_provider import MemoryRecord, NativeMemoryProvider
from src.constants import DATA_DIR
@dataclass
@@ -38,7 +39,7 @@ class MemoryService:
results = await service.recall("preferences")
"""
def __init__(self, data_dir: str = "data"):
def __init__(self, data_dir: str = DATA_DIR):
self.manager = MemoryManager(data_dir)
self.vector_store = MemoryVectorStore(data_dir) if os.path.exists(
os.path.join(data_dir, "memory_vectors")
+2 -1
View File
@@ -15,10 +15,11 @@ from pathlib import Path
from typing import Optional, Dict
from src.research_utils import is_low_quality
from src.constants import DEEP_RESEARCH_DIR
logger = logging.getLogger(__name__)
RESEARCH_DATA_DIR = Path("data/deep_research")
RESEARCH_DATA_DIR = Path(DEEP_RESEARCH_DIR)
class ResearchHandler:
+3 -1
View File
@@ -9,6 +9,8 @@ import httpx
from pathlib import Path
from typing import Optional, Dict, Any
from src.constants import TTS_CACHE_DIR
logger = logging.getLogger(__name__)
@@ -35,7 +37,7 @@ class TTSService:
"endpoint:<id>" OpenAI-compatible /audio/speech via ModelEndpoint
"""
def __init__(self, cache_dir: str = "data/tts_cache"):
def __init__(self, cache_dir: str = TTS_CACHE_DIR):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self._kokoro = None # lazy-init
+16 -11
View File
@@ -12,19 +12,24 @@ import subprocess
import sys
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
sys.path.insert(0, BASE_DIR)
from src.constants import (
DATA_DIR, AUTH_FILE, UPLOAD_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR,
TTS_CACHE_DIR, GENERATED_IMAGES_DIR, DEEP_RESEARCH_DIR, CHROMA_DIR,
RAG_DIR, MEMORY_VECTORS_DIR,
)
DIRS = [
DATA_DIR,
os.path.join(DATA_DIR, "uploads"),
os.path.join(DATA_DIR, "personal_docs"),
os.path.join(DATA_DIR, "personal_uploads"),
os.path.join(DATA_DIR, "tts_cache"),
os.path.join(DATA_DIR, "generated_images"),
os.path.join(DATA_DIR, "deep_research"),
os.path.join(DATA_DIR, "chroma"),
os.path.join(DATA_DIR, "rag"),
os.path.join(DATA_DIR, "memory_vectors"),
UPLOAD_DIR,
PERSONAL_DIR,
PERSONAL_UPLOADS_DIR,
TTS_CACHE_DIR,
GENERATED_IMAGES_DIR,
DEEP_RESEARCH_DIR,
CHROMA_DIR,
RAG_DIR,
MEMORY_VECTORS_DIR,
os.path.join(BASE_DIR, "logs"),
]
@@ -74,7 +79,7 @@ def _prompt_admin_credentials():
def create_default_admin():
"""Create an initial admin user if none exists."""
auth_path = os.path.join(DATA_DIR, "auth.json")
auth_path = AUTH_FILE
if os.path.exists(auth_path):
print(" [skip] auth.json already exists")
return "exists"
+4 -2
View File
@@ -14,6 +14,8 @@ import uuid
import time
from typing import Dict, Optional, Tuple
from src.constants import GENERATED_IMAGES_DIR
logger = logging.getLogger(__name__)
AI_CHAT_TIMEOUT = 120 # seconds for a single LLM call
@@ -1715,7 +1717,7 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
# GPT image models always return b64_json; DALL-E may return url
if img.get("b64_json"):
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
filename = f"{uuid.uuid4().hex[:12]}.png"
img_path = img_dir / filename
@@ -1728,7 +1730,7 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
try:
dl_resp = httpx.get(img["url"], timeout=60)
if dl_resp.status_code == 200:
img_dir = Path("data/generated_images")
img_dir = Path(GENERATED_IMAGES_DIR)
img_dir.mkdir(parents=True, exist_ok=True)
filename = f"{uuid.uuid4().hex[:12]}.png"
img_path = img_dir / filename
+4 -3
View File
@@ -38,9 +38,10 @@ from core.platform_compat import (
pid_alive,
)
_DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))
_JOBS_DIR = _DATA_DIR / "bg_jobs"
_STORE = _DATA_DIR / "bg_jobs.json"
from src.constants import BG_JOBS_DIR, BG_JOBS_FILE
_JOBS_DIR = Path(BG_JOBS_DIR)
_STORE = Path(BG_JOBS_FILE)
# A job that runs longer than this is presumed stuck and reaped (the agent
# still gets a "timed out" follow-up so nothing hangs forever).
+9 -8
View File
@@ -12,7 +12,8 @@ from typing import Tuple
from src.auth_helpers import owner_filter
from core.platform_compat import IS_WINDOWS, find_bash
from core.constants import DATA_DIR, internal_api_base
from core.constants import internal_api_base
from src.constants import DATA_DIR, DEEP_RESEARCH_DIR, TIDY_CALENDAR_STATE_FILE, EMAIL_URGENCY_CACHE_DIR, COOKBOOK_STATE_FILE
logger = logging.getLogger(__name__)
@@ -349,7 +350,7 @@ async def action_tidy_research(owner: str, **kwargs) -> Tuple[str, bool]:
try:
from pathlib import Path
import json as _json
research_dir = Path("data/deep_research")
research_dir = Path(DEEP_RESEARCH_DIR)
if not research_dir.exists():
raise TaskNoop("no research directory")
files = list(research_dir.glob("*.json"))
@@ -387,7 +388,7 @@ async def action_tidy_calendar(owner: str, **kwargs) -> Tuple[str, bool]:
from core.database import SessionLocal, CalendarEvent
from sqlalchemy import func
STATE_FILE = Path("data/tidy_calendar_state.json")
STATE_FILE = Path(TIDY_CALENDAR_STATE_FILE)
last_watermark = None
try:
if STATE_FILE.exists():
@@ -1304,12 +1305,12 @@ async def action_ping_notes(owner: str, **kwargs) -> Tuple[str, bool]:
# users' entries (review C4). Legacy path kept as fallback so a
# single-user install (empty owner) doesn't lose its history.
_owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
STATE = _P(f"data/note_pings_{_owner_slug}.json")
STATE = _P(DATA_DIR) / f"note_pings_{_owner_slug}.json"
STATE.parent.mkdir(parents=True, exist_ok=True)
# One-time migration: if legacy global file exists and per-owner file
# doesn't, seed from global (entries for OTHER owners still get pruned
# on their first run — acceptable, prevents silent loss).
_legacy = _P("data/note_pings.json")
_legacy = _P(DATA_DIR) / "note_pings.json"
if _legacy.exists() and not STATE.exists():
try:
STATE.write_text(_legacy.read_text(encoding="utf-8"), encoding="utf-8")
@@ -1466,8 +1467,8 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
# notified_uids / urgency counts. Empty owner falls back to a generic
# filename for single-user installs (matches prior behaviour).
_owner_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
STATE_PATH = _P(f"data/email_urgency_state_{_owner_slug}.json")
CACHE_DIR = _P("data/email_urgency_cache")
STATE_PATH = _P(DATA_DIR) / f"email_urgency_state_{_owner_slug}.json"
CACHE_DIR = _P(EMAIL_URGENCY_CACHE_DIR)
CACHE_DIR.mkdir(parents=True, exist_ok=True)
STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
AGE_CUTOFF = _dt.utcnow() - _td(days=7)
@@ -2043,7 +2044,7 @@ async def action_cookbook_serve(
except Exception:
end_after_min = 0
state_path = Path(DATA_DIR) / "cookbook_state.json"
state_path = Path(COOKBOOK_STATE_FILE)
try:
state = json.loads(state_path.read_text(encoding="utf-8")) if state_path.exists() else {}
except Exception:
+10 -8
View File
@@ -4,6 +4,8 @@ from typing import List, Optional
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field, field_validator
from src.constants import DATA_DIR as _DATA_DIR_CONST
# Cross-platform OS flag, exposed here so callers can `from src.config import
# IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported
# from core.platform_compat, to keep this dependency-light config module from
@@ -20,13 +22,13 @@ class DataConfig(BaseSettings):
base_dir: Path = Field(default=Path(__file__).parent.parent, description="Base directory for the application")
# Data paths
data_dir: Path = Field(default=Path("data"), description="Main data directory")
uploads_dir: Path = Field(default=Path("data/uploads"), description="Directory for uploaded files")
sessions_file: Path = Field(default=Path("data/sessions.json"), description="Sessions storage file")
memory_file: Path = Field(default=Path("data/memory.json"), description="Memory storage file")
memory_doc: Path = Field(default=Path("data/memory_doc.md"), description="Memory document file")
personal_dir: Path = Field(default=Path("data/personal_docs"), description="Personal documents directory")
runbook_dir: Path = Field(default=Path("data/personal_docs/runbook"), description="Runbook directory")
data_dir: Path = Field(default=Path(_DATA_DIR_CONST), description="Main data directory")
uploads_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "uploads", description="Directory for uploaded files")
sessions_file: Path = Field(default=Path(_DATA_DIR_CONST) / "sessions.json", description="Sessions storage file")
memory_file: Path = Field(default=Path(_DATA_DIR_CONST) / "memory.json", description="Memory storage file")
memory_doc: Path = Field(default=Path(_DATA_DIR_CONST) / "memory_doc.md", description="Memory document file")
personal_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "personal_docs", description="Personal documents directory")
runbook_dir: Path = Field(default=Path(_DATA_DIR_CONST) / "personal_docs" / "runbook", description="Runbook directory")
# Upload settings
max_upload_size: int = Field(default=10 * 1024 * 1024, description="Maximum upload size in bytes (10MB)")
@@ -139,7 +141,7 @@ class AppConfig(BaseSettings):
base_dir = Path(__file__).parent.parent
# Convert string paths to Path objects relative to base_dir
data_dir = base_dir / "data"
data_dir = Path(_DATA_DIR_CONST)
# Get values from the input dict or use defaults
max_upload_size = v.get("max_upload_size", 10 * 1024 * 1024) if isinstance(v, dict) else 10 * 1024 * 1024
+58 -1
View File
@@ -7,9 +7,12 @@ APP_VERSION = "1.0.0"
# Base paths
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
STATIC_DIR = os.path.join(BASE_DIR, "static")
DATA_DIR = os.path.join(BASE_DIR, "data")
DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", os.path.join(BASE_DIR, "data"))
# Data file paths
# Single source of truth: every persisted file/dir lives under DATA_DIR, which
# is the ONLY place ODYSSEUS_DATA_DIR is read. Import these constants instead of
# re-deriving paths from __file__ or a relative "data" literal.
SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json")
MEMORY_FILE = os.path.join(DATA_DIR, "memory.json")
MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md")
@@ -18,6 +21,41 @@ RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook")
UPLOAD_DIR = os.path.join(DATA_DIR, "uploads")
FEATURES_FILE = os.path.join(DATA_DIR, "features.json")
SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json")
AUTH_FILE = os.path.join(DATA_DIR, "auth.json")
USER_PREFS_FILE = os.path.join(DATA_DIR, "user_prefs.json")
PRESETS_FILE = os.path.join(DATA_DIR, "presets.json")
INTEGRATIONS_FILE = os.path.join(DATA_DIR, "integrations.json")
CONTACTS_FILE = os.path.join(DATA_DIR, "contacts.json")
APP_KEY_FILE = os.path.join(DATA_DIR, ".app_key")
EMBEDDING_ENDPOINT_FILE = os.path.join(DATA_DIR, "embedding_endpoint.json")
COOKBOOK_STATE_FILE = os.path.join(DATA_DIR, "cookbook_state.json")
BG_JOBS_FILE = os.path.join(DATA_DIR, "bg_jobs.json")
VAULT_FILE = os.path.join(DATA_DIR, "vault.json")
TIDY_CALENDAR_STATE_FILE = os.path.join(DATA_DIR, "tidy_calendar_state.json")
SKILLS_FILE = os.path.join(DATA_DIR, "skills.json")
APP_DB = os.path.join(DATA_DIR, "app.db")
SCHEDULED_EMAILS_DB = os.path.join(DATA_DIR, "scheduled_emails.db")
EMAIL_CACHE_DB = os.path.join(DATA_DIR, "email_cache.db")
# Data subdirectories
PERSONAL_UPLOADS_DIR = os.path.join(DATA_DIR, "personal_uploads")
EMOJI_CACHE_DIR = os.path.join(DATA_DIR, "emoji_cache")
RAG_DIR = os.path.join(DATA_DIR, "rag")
CHROMA_DIR = os.path.join(DATA_DIR, "chroma")
BG_JOBS_DIR = os.path.join(DATA_DIR, "bg_jobs")
DEEP_RESEARCH_DIR = os.path.join(DATA_DIR, "deep_research")
MCP_OAUTH_DIR = os.path.join(DATA_DIR, "mcp_oauth")
GENERATED_IMAGES_DIR = os.path.join(DATA_DIR, "generated_images")
TTS_CACHE_DIR = os.path.join(DATA_DIR, "tts_cache")
EMAIL_URGENCY_CACHE_DIR = os.path.join(DATA_DIR, "email_urgency_cache")
SKILLS_DIR = os.path.join(DATA_DIR, "skills")
GALLERY_DIR = os.path.join(DATA_DIR, "gallery")
GALLERY_UPLOADS_DIR = os.path.join(DATA_DIR, "gallery_uploads")
MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors")
# Paths with an intentional dedicated env override, defaulting under DATA_DIR.
MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments"))
FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache"))
# Agent tool output limits (single source of truth — imported by tool_execution.py,
# tool_implementations.py, agent_tools.py, and any other module that needs them)
@@ -44,3 +82,22 @@ CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))
# Default parameters
DEFAULT_TEMPERATURE = 1.0
DEFAULT_MAX_TOKENS = 0
def internal_api_base() -> str:
"""Base URL for in-process loopback calls to Odysseus's own API.
Agent tools and background jobs reach admin-gated routes by calling the
running server over HTTP. Resolution order:
1. ODYSSEUS_INTERNAL_BASE - explicit override (e.g. behind a TLS proxy).
2. APP_PORT - http://127.0.0.1:$APP_PORT (docker-compose).
3. Fallback http://127.0.0.1:7000 - legacy default.
127.0.0.1 (not "localhost") avoids IPv6/DNS ambiguity for a strictly-local
call. Without this, loopback tools fail with "All connection attempts
failed" whenever the server is not on port 7000.
"""
override = os.environ.get("ODYSSEUS_INTERNAL_BASE")
if override:
return override.rstrip("/")
return f"http://127.0.0.1:{os.environ.get('APP_PORT', '7000')}"
+3 -2
View File
@@ -19,7 +19,8 @@ import time
from pathlib import Path
import httpx
from core.constants import DATA_DIR, internal_api_base
from core.constants import internal_api_base
from src.constants import COOKBOOK_STATE_FILE
logger = logging.getLogger(__name__)
@@ -130,7 +131,7 @@ async def _stop_serve(session_id: str, remote_host: str = "", ssh_port: str = ""
async def _tick() -> None:
state_path = Path(DATA_DIR) / "cookbook_state.json"
state_path = Path(COOKBOOK_STATE_FILE)
if not state_path.exists():
return
try:
+4 -8
View File
@@ -14,6 +14,8 @@ Set EMBEDDING_URL in .env, e.g.:
import os
from src.constants import FASTEMBED_CACHE_DIR, EMBEDDING_ENDPOINT_FILE
# Windows: force HuggingFace/fastembed to COPY model files rather than symlink
# them. On a network-share/UNC cache dir Windows can't follow HF's symlinks
# ([WinError 1463] "symbolic link cannot be followed"), so ONNX fails to load the
@@ -117,10 +119,7 @@ class FastEmbedClient:
# Persistent cache under data/ so the model survives reboots and so
# the download lands exactly where the admin panel's _is_downloaded()
# check looks (both default to this same path).
cache_dir = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"data", "fastembed_cache",
)
cache_dir = FASTEMBED_CACHE_DIR
os.makedirs(cache_dir, exist_ok=True)
# Windows self-heal: the HuggingFace-hub cache stores model files as
# symlinks (snapshots/<rev>/model.onnx -> ../../blobs/<hash>). On a
@@ -188,10 +187,7 @@ class FastEmbedClient:
def _load_persisted_endpoint() -> dict:
"""Load the custom embedding endpoint saved from the admin panel."""
try:
endpoint_file = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"data", "embedding_endpoint.json",
)
endpoint_file = EMBEDDING_ENDPOINT_FILE
if os.path.exists(endpoint_file):
import json
data = json.loads(open(endpoint_file, encoding="utf-8").read())
+3 -3
View File
@@ -12,6 +12,8 @@ import os
from datetime import datetime
from typing import Optional
from src.constants import AUTH_FILE
logger = logging.getLogger(__name__)
_task_scheduler = None
@@ -54,9 +56,7 @@ def _resolve_event_owner(owner: Optional[str]) -> Optional[str]:
return owner
try:
from src.constants import DATA_DIR
auth_path = os.path.join(DATA_DIR, "auth.json")
auth_path = AUTH_FILE
with open(auth_path, "r", encoding="utf-8") as f:
users = (json.load(f).get("users") or {})
for username, data in users.items():
+3 -1
View File
@@ -4,8 +4,10 @@ from pathlib import Path
from fastapi import HTTPException
from src.constants import GENERATED_IMAGES_DIR
GENERATED_IMAGE_DIR = Path("data/generated_images")
GENERATED_IMAGE_DIR = Path(GENERATED_IMAGES_DIR)
GENERATED_IMAGE_RE = re.compile(
r"^[a-f0-9]{8,64}\.(png|jpg|jpeg|webp|gif|mp4|mov|webm|mkv|m4v)$"
)
+3 -2
View File
@@ -10,10 +10,11 @@ import httpx
from core.atomic_io import atomic_write_json
from core.platform_compat import safe_chmod
from src.secret_storage import decrypt, encrypt, is_encrypted
from src.constants import DATA_DIR, INTEGRATIONS_FILE, SETTINGS_FILE
log = logging.getLogger(__name__)
DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "integrations.json")
DATA_FILE = INTEGRATIONS_FILE
# ---------------------------------------------------------------------------
# Presets
@@ -471,7 +472,7 @@ def get_integrations_prompt() -> str:
def migrate_from_settings() -> None:
"""If data/settings.json has miniflux_url and miniflux_api_key, create a
Miniflux integration and clear those keys from settings."""
settings_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "settings.json")
settings_path = SETTINGS_FILE
if not os.path.exists(settings_path):
return
+3 -1
View File
@@ -7,6 +7,8 @@ A thin wrapper around VectorRAG for backward compatibility and additional featur
import logging
from typing import List, Dict, Any, Optional
from src.constants import CHROMA_DIR
# Try to import from different possible locations
try:
from rag_vector import VectorRAG
@@ -24,7 +26,7 @@ class RAGManager:
Most methods delegate directly to VectorRAG.
"""
def __init__(self, persist_directory: str = "data/chroma"):
def __init__(self, persist_directory: str = CHROMA_DIR):
"""Initialize the RAGManager with VectorRAG."""
self.vector_rag = VectorRAG(persist_directory=persist_directory)
logger.info("RAGManager initialized as wrapper for VectorRAG")
+3 -2
View File
@@ -6,6 +6,8 @@ import logging
import time
from pathlib import Path
from src.constants import RAG_DIR
logger = logging.getLogger(__name__)
rag_instance = None
@@ -41,8 +43,7 @@ def get_rag_manager():
try:
from src.rag_vector import VectorRAG
base_dir = Path(__file__).parent.parent
persist_dir = os.path.join(base_dir, "data", "rag")
persist_dir = RAG_DIR
rag_instance = VectorRAG(persist_directory=persist_dir)
if not rag_instance.healthy:
+3 -1
View File
@@ -12,6 +12,8 @@ import re
import logging
import numpy as np
from typing import List, Dict, Any, Optional, Set
from src.constants import CHROMA_DIR
from pathlib import Path
from src.embedding_lanes import (
@@ -51,7 +53,7 @@ def _generate_doc_id(text: str, owner: str = "") -> str:
class VectorRAG:
"""RAG system using ChromaDB vector storage with hybrid search."""
def __init__(self, persist_directory: str = "data/chroma"):
def __init__(self, persist_directory: str = CHROMA_DIR):
self.persist_directory = persist_directory
self._collection = None
self._model = None
+2 -1
View File
@@ -16,10 +16,11 @@ from pathlib import Path
from typing import Optional, Dict
from src.research_utils import strip_thinking, is_low_quality
from src.constants import DEEP_RESEARCH_DIR
logger = logging.getLogger(__name__)
RESEARCH_DATA_DIR = Path("data/deep_research")
RESEARCH_DATA_DIR = Path(DEEP_RESEARCH_DIR)
_RESEARCH_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9-]{1,128}$")
+2 -1
View File
@@ -25,10 +25,11 @@ from pathlib import Path
from cryptography.fernet import Fernet, InvalidToken
from core.platform_compat import safe_chmod
from src.constants import APP_KEY_FILE
logger = logging.getLogger(__name__)
_KEY_PATH = Path(__file__).resolve().parent.parent / "data" / ".app_key"
_KEY_PATH = Path(APP_KEY_FILE)
_PREFIX = "enc:"
_fernet: Fernet | None = None
+2 -2
View File
@@ -20,14 +20,14 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Tuple
from src.tool_security import is_public_blocked_tool, owner_is_admin_or_single_user
from src.tool_policy import ToolPolicy
from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, MAX_DIFF_LINES
from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, MAX_DIFF_LINES, DATA_DIR
# Persistent working directory for agent subprocesses.
# Resolves to <repo_root>/data, which is the bind-mounted volume in Docker
# (/app/data) and the local data directory for manual installs.
# Using this as cwd and HOME prevents the agent from silently creating files
# in ephemeral container layers that are lost on the next rebuild.
_AGENT_WORKDIR = str(pathlib.Path(__file__).parent.parent / "data")
_AGENT_WORKDIR = DATA_DIR
def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
+4 -4
View File
@@ -12,7 +12,7 @@ import os
import re
from typing import Any, Dict, List, Optional
from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS
from src.constants import MAX_OUTPUT_CHARS, MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE
from core.constants import internal_api_base
@@ -4057,7 +4057,7 @@ async def do_manage_research(content: str, owner: Optional[str] = None) -> Dict:
args = {}
action = (args.get("action") or "list").lower()
rid = (args.get("id") or args.get("session_id") or args.get("research_id") or "").strip()
data_dir = _Path("data/deep_research")
data_dir = _Path(DEEP_RESEARCH_DIR)
# SECURITY: the research id is interpolated straight into a filesystem
# path (data/deep_research/<rid>.json) for read AND delete. Without this
@@ -4302,7 +4302,7 @@ async def do_manage_contact(content: str, owner: Optional[str] = None) -> Dict:
def _load_vault_config() -> Dict:
"""Load Vaultwarden config from data/vault.json."""
from pathlib import Path
p = Path("data/vault.json")
p = Path(VAULT_FILE)
if p.exists():
try:
return json.loads(p.read_text(encoding="utf-8"))
@@ -4456,7 +4456,7 @@ async def do_vault_unlock(content: str, owner: Optional[str] = None) -> Dict:
# Save session to vault.json
from pathlib import Path
p = Path("data/vault.json")
p = Path(VAULT_FILE)
cfg = {}
if p.exists():
try:
+5 -4
View File
@@ -118,10 +118,11 @@ def test_pairing_payload_shape():
@pytest.mark.parametrize("payload", ["[]", '{"users": []}'])
def test_find_admin_user_ignores_invalid_auth_shape(tmp_path, monkeypatch, payload):
data_dir = tmp_path / "data"
data_dir.mkdir()
(data_dir / "auth.json").write_text(payload)
monkeypatch.chdir(tmp_path)
auth_file = tmp_path / "auth.json"
auth_file.write_text(payload)
# find_admin_user reads the import-time AUTH_FILE constant, so redirect that
# rather than relying on cwd.
monkeypatch.setattr(P, "AUTH_FILE", str(auth_file))
assert P.find_admin_user() is None
-29
View File
@@ -1,29 +0,0 @@
"""Guard: cookbook_state.json must be located via DATA_DIR, not hardcoded /app/data
(which breaks native runs) or a relative os.environ fallback."""
import pathlib
ROOT = pathlib.Path(__file__).resolve().parent.parent
FILES = [
"src/cookbook_serve_lifecycle.py",
"src/builtin_actions.py",
"routes/codex_routes.py",
"routes/cookbook_routes.py",
]
def test_no_hardcoded_app_data_cookbook_state():
for rel in FILES:
text = (ROOT / rel).read_text(encoding="utf-8")
for ln in text.splitlines():
if ln.strip().startswith("#"):
continue
assert "/app/data/cookbook_state" not in ln, f"{rel}: hardcoded /app/data: {ln.strip()}"
assert 'os.environ.get("DATA_DIR"' not in ln, f"{rel}: relative DATA_DIR env fallback: {ln.strip()}"
def test_cookbook_state_uses_datadir_constant():
# Each file that references cookbook_state.json should import the DATA_DIR constant.
for rel in FILES:
text = (ROOT / rel).read_text(encoding="utf-8")
if "cookbook_state.json" in text:
assert "from core.constants import DATA_DIR" in text, f"{rel}: missing DATA_DIR import"
+10
View File
@@ -11,6 +11,16 @@ from fastapi import HTTPException
from routes.research_routes import setup_research_routes
@pytest.fixture(autouse=True)
def _redirect_research_dir(tmp_path, monkeypatch):
# Deep-research paths are resolved from an import-time constant now, so chdir
# no longer redirects them. Point the constant the routes read at the temp dir.
monkeypatch.setattr(
"routes.research_routes.DEEP_RESEARCH_DIR",
str(tmp_path / "data" / "deep_research"),
)
def _request(user: str):
return SimpleNamespace(state=SimpleNamespace(current_user=user))
+4 -4
View File
@@ -946,7 +946,7 @@ def _import_mcp_routes():
def test_mcp_oauth_paths_resolve_under_data_dir(tmp_path, monkeypatch):
mcp_routes = _import_mcp_routes()
monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data"))
monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth"))
resolved = Path(mcp_routes._resolve_mcp_oauth_path("gmail/credentials.json", "token_file"))
@@ -963,7 +963,7 @@ def test_mcp_oauth_paths_reject_escapes(tmp_path, monkeypatch, raw_path):
from fastapi import HTTPException
mcp_routes = _import_mcp_routes()
monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data"))
monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth"))
with pytest.raises(HTTPException) as exc:
mcp_routes._resolve_mcp_oauth_path(raw_path, "token_file")
@@ -974,7 +974,7 @@ def test_mcp_oauth_filename_join_cannot_escape_base(tmp_path, monkeypatch):
from fastapi import HTTPException
mcp_routes = _import_mcp_routes()
monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data"))
monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth"))
safe_dir = mcp_routes._resolve_mcp_oauth_path("gmail", "dir")
with pytest.raises(HTTPException):
@@ -983,7 +983,7 @@ def test_mcp_oauth_filename_join_cannot_escape_base(tmp_path, monkeypatch):
def test_mcp_oauth_config_sanitizes_paths_and_env(tmp_path, monkeypatch):
mcp_routes = _import_mcp_routes()
monkeypatch.setattr(mcp_routes, "DATA_DIR", str(tmp_path / "data"))
monkeypatch.setattr(mcp_routes, "MCP_OAUTH_DIR", str(tmp_path / "data" / "mcp_oauth"))
cfg = mcp_routes._sanitize_mcp_oauth_config({
"provider": "google",
+1 -1
View File
@@ -13,7 +13,7 @@ def _load_setup_module():
def test_create_default_admin_normalizes_env_username(tmp_path, monkeypatch):
setup_module = _load_setup_module()
monkeypatch.setattr(setup_module, "DATA_DIR", str(tmp_path))
monkeypatch.setattr(setup_module, "AUTH_FILE", str(tmp_path / "auth.json"))
monkeypatch.setenv("ODYSSEUS_ADMIN_USER", " AdminUser ")
monkeypatch.setenv("ODYSSEUS_ADMIN_PASSWORD", "temporary-password")