From 94d2754f412666d379b8c0aaed4d6b2cd1e17a99 Mon Sep 17 00:00:00 2001 From: Kenny Van de Maele Date: Mon, 8 Jun 2026 23:42:37 +0200 Subject: [PATCH] refactor(uploads): centralize upload byte-limits in upload_limits.py (#3364) Move every per-route upload byte-limit into src/upload_limits.py as a validated, env-overridable constant via read_byte_limit_env: - Add GALLERY_UPLOAD_MAX_BYTES, GALLERY_TRANSFORM_UPLOAD_MAX_BYTES, MEMORY_IMPORT_MAX_BYTES, PERSONAL_UPLOAD_MAX_BYTES, EMAIL_COMPOSE_UPLOAD_MAX_BYTES, STT_MAX_AUDIO_BYTES, ICS_MAX_BYTES. - Routes import their constant instead of defining it locally: replaces 4 raw int(os.getenv(...)) and removes 3 hardcoded literals. - The 3 previously-hardcoded limits (email compose, STT audio, calendar ICS) are now env-overridable with the same ODYSSEUS_*_MAX_BYTES naming. - Defaults unchanged, so behavior is unchanged unless an env var is set; an invalid value now fails fast with a clear message instead of a bare int() ValueError. - Document all env vars in .env.example and the README. Fixes #3364 --- .env.example | 10 +++ README.md | 9 ++ routes/calendar_routes.py | 10 +-- routes/email_routes.py | 3 +- routes/gallery_routes.py | 9 +- routes/memory_routes.py | 3 +- routes/personal_routes.py | 8 +- routes/stt_routes.py | 4 +- src/upload_limits.py | 28 ++++++ tests/test_direct_upload_limits.py | 2 +- tests/test_upload_limits_centralized.py | 110 ++++++++++++++++++++++++ 11 files changed, 174 insertions(+), 22 deletions(-) create mode 100644 tests/test_upload_limits_centralized.py diff --git a/.env.example b/.env.example index 63708ea31..5382c23c7 100644 --- a/.env.example +++ b/.env.example @@ -159,6 +159,16 @@ SEARXNG_INSTANCE=http://localhost:8080 # Example: 52428800 = 50 MB. # ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=10485760 +# Other per-feature upload size caps in bytes. All are validated and optional; +# defaults shown. An invalid value (non-integer or < 1) fails fast at startup. +# ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=104857600 # gallery image upload (100 MB) +# ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=26214400 # gallery transform input (25 MB) +# ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=10485760 # memory import file (10 MB) +# ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=26214400 # personal document upload (25 MB) +# ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=26214400 # email compose attachment (25 MB) +# ODYSSEUS_STT_MAX_AUDIO_BYTES=26214400 # speech-to-text audio (25 MB) +# ODYSSEUS_ICS_MAX_BYTES=10485760 # calendar .ics import (10 MB) + # ============================================================ # GPU support (Docker Compose) # ============================================================ diff --git a/README.md b/README.md index 534c0c9ad..4fae1d76b 100644 --- a/README.md +++ b/README.md @@ -403,6 +403,15 @@ Key settings: | `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. | | `EMBEDDING_URL` | -- | OpenAI-compatible embeddings endpoint | | `ODYSSEUS_CHAT_UPLOAD_MAX_BYTES` | `10485760` | Chat/agent attachment cap in bytes. Raise for larger local PDFs or text documents. | +| `ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES` | `104857600` | Gallery image upload cap in bytes (100 MB). | +| `ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES` | `26214400` | Gallery transform input cap in bytes (25 MB). | +| `ODYSSEUS_MEMORY_IMPORT_MAX_BYTES` | `10485760` | Memory import file cap in bytes (10 MB). | +| `ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES` | `26214400` | Personal document upload cap in bytes (25 MB). | +| `ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES` | `26214400` | Email compose attachment cap in bytes (25 MB). | +| `ODYSSEUS_STT_MAX_AUDIO_BYTES` | `26214400` | Speech-to-text audio cap in bytes (25 MB). | +| `ODYSSEUS_ICS_MAX_BYTES` | `10485760` | Calendar `.ics` import cap in bytes (10 MB). | + +All upload-limit vars are validated (must be a positive integer) and optional; an invalid value fails fast at startup. ### Built-in MCP servers (optional setup) diff --git a/routes/calendar_routes.py b/routes/calendar_routes.py index 0a30d9205..345280528 100644 --- a/routes/calendar_routes.py +++ b/routes/calendar_routes.py @@ -13,7 +13,7 @@ from dateutil.rrule import rrulestr from core.database import SessionLocal, CalendarCal, CalendarEvent from src.auth_helpers import require_user -from src.upload_limits import read_upload_limited +from src.upload_limits import read_upload_limited, ICS_MAX_BYTES logger = logging.getLogger(__name__) @@ -1170,9 +1170,9 @@ def setup_calendar_routes() -> APIRouter: finally: db.close() - # 10 MB hard cap on ICS upload. Loading the whole file into memory is - # unavoidable with python-icalendar, so an unbounded upload would OOM. - _ICS_MAX_BYTES = 10 * 1024 * 1024 + # Hard cap on ICS upload (ICS_MAX_BYTES, default 10 MB). Loading the whole + # file into memory is unavoidable with python-icalendar, so an unbounded + # upload would OOM. @router.post("/import") async def import_ics(request: Request, file: UploadFile = File(...), calendar_name: str = ""): @@ -1182,7 +1182,7 @@ def setup_calendar_routes() -> APIRouter: owner = _require_user(request) db = SessionLocal() try: - content = await read_upload_limited(file, _ICS_MAX_BYTES, "ICS file") + content = await read_upload_limited(file, ICS_MAX_BYTES, "ICS file") try: cal_data = iCal.from_ical(content) except Exception as e: diff --git a/routes/email_routes.py b/routes/email_routes.py index 8441605ea..797a142f2 100644 --- a/routes/email_routes.py +++ b/routes/email_routes.py @@ -35,7 +35,7 @@ from fastapi.responses import FileResponse from src.constants import DATA_DIR from src.llm_core import llm_call_async -from src.upload_limits import read_upload_limited +from src.upload_limits import read_upload_limited, EMAIL_COMPOSE_UPLOAD_MAX_BYTES from routes.email_helpers import ( _strip_think, _extract_reply, _apply_email_style_mechanics, require_owner, require_user, _assert_owns_account, @@ -58,7 +58,6 @@ from routes.email_pollers import _start_poller logger = logging.getLogger(__name__) ODYSSEUS_MAIL_ORIGIN = "odysseus-ui" -EMAIL_COMPOSE_UPLOAD_MAX_BYTES = 25 * 1024 * 1024 def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]: diff --git a/routes/gallery_routes.py b/routes/gallery_routes.py index ed598f031..43999344e 100644 --- a/routes/gallery_routes.py +++ b/routes/gallery_routes.py @@ -13,7 +13,11 @@ from fastapi import APIRouter, HTTPException, Query, Request from core.database import SessionLocal, GalleryImage, GalleryAlbum, ModelEndpoint from core.database import Session as DbSession from src.auth_helpers import get_current_user, owner_filter, require_privilege -from src.upload_limits import read_upload_limited +from src.upload_limits import ( + read_upload_limited, + GALLERY_UPLOAD_MAX_BYTES, + GALLERY_TRANSFORM_UPLOAD_MAX_BYTES, +) from src.constants import GENERATED_IMAGES_DIR from routes.gallery_helpers import ( @@ -22,9 +26,6 @@ from routes.gallery_helpers import ( logger = logging.getLogger(__name__) -GALLERY_UPLOAD_MAX_BYTES = int(os.getenv("ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES", str(100 * 1024 * 1024))) -GALLERY_TRANSFORM_UPLOAD_MAX_BYTES = int(os.getenv("ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024))) - def _current_user_is_admin(request: Request, user: str | None) -> bool: if not user: diff --git a/routes/memory_routes.py b/routes/memory_routes.py index 9da566fa7..7be3c6d32 100644 --- a/routes/memory_routes.py +++ b/routes/memory_routes.py @@ -29,11 +29,10 @@ from src.llm_core import llm_call_async from services.memory.memory_extractor import audit_memories from src.auth_helpers import get_current_user, require_user from src.endpoint_resolver import resolve_endpoint -from src.upload_limits import read_upload_limited +from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES logger = logging.getLogger(__name__) -MEMORY_IMPORT_MAX_BYTES = int(os.getenv("ODYSSEUS_MEMORY_IMPORT_MAX_BYTES", str(10 * 1024 * 1024))) def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionManager, memory_vector=None): """Set up memory-related routes.""" diff --git a/routes/personal_routes.py b/routes/personal_routes.py index 4ef3219fc..c32f5ffe1 100644 --- a/routes/personal_routes.py +++ b/routes/personal_routes.py @@ -11,11 +11,9 @@ from src.rag_singleton import get_rag_manager from src.auth_helpers import require_privilege, require_user from core.middleware import require_admin from src.upload_handler import secure_filename +from src.upload_limits import PERSONAL_UPLOAD_MAX_BYTES UPLOADS_DIR = PERSONAL_UPLOADS_DIR -MAX_PERSONAL_UPLOAD_BYTES = int( - os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024)) -) logger = logging.getLogger(__name__) @@ -208,8 +206,8 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available): for upload in files: try: file_path, stored_name, safe_name = _unique_personal_upload_path(upload_dir, upload.filename) - content_bytes = await upload.read(MAX_PERSONAL_UPLOAD_BYTES + 1) - if len(content_bytes) > MAX_PERSONAL_UPLOAD_BYTES: + content_bytes = await upload.read(PERSONAL_UPLOAD_MAX_BYTES + 1) + if len(content_bytes) > PERSONAL_UPLOAD_MAX_BYTES: logger.warning(f"Rejected oversized personal upload: {upload.filename!r}") total_failed += 1 continue diff --git a/routes/stt_routes.py b/routes/stt_routes.py index fdb3c4a82..fb95b69cb 100644 --- a/routes/stt_routes.py +++ b/routes/stt_routes.py @@ -4,12 +4,10 @@ from fastapi import APIRouter, HTTPException, UploadFile, File import logging -from src.upload_limits import read_upload_limited +from src.upload_limits import read_upload_limited, STT_MAX_AUDIO_BYTES logger = logging.getLogger(__name__) -STT_MAX_AUDIO_BYTES = 25 * 1024 * 1024 - def setup_stt_routes(stt_service): """Setup STT routes with the provided STT service""" diff --git a/src/upload_limits.py b/src/upload_limits.py index d16835d21..2be42077b 100644 --- a/src/upload_limits.py +++ b/src/upload_limits.py @@ -33,6 +33,34 @@ def get_chat_upload_max_bytes() -> int: return read_byte_limit_env(CHAT_UPLOAD_MAX_BYTES_ENV, DEFAULT_CHAT_UPLOAD_MAX_BYTES) +# Per-route upload byte-limits, single-sourced here (issue #3364). Each is +# validated + env-overridable via read_byte_limit_env: set the matching +# ODYSSEUS_*_MAX_BYTES env var to an integer byte count to tune it; an invalid +# value fails fast at import rather than crashing mid-request. Defaults match +# the prior per-route values, so behavior is unchanged unless an env var is set. +GALLERY_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES", 100 * 1024 * 1024 +) +GALLERY_TRANSFORM_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +MEMORY_IMPORT_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_MEMORY_IMPORT_MAX_BYTES", 10 * 1024 * 1024 +) +PERSONAL_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +EMAIL_COMPOSE_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +STT_MAX_AUDIO_BYTES = read_byte_limit_env( + "ODYSSEUS_STT_MAX_AUDIO_BYTES", 25 * 1024 * 1024 +) +ICS_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_ICS_MAX_BYTES", 10 * 1024 * 1024 +) + + async def read_upload_limited(upload: UploadFile, limit: int, label: str = "Upload") -> bytes: """Read an UploadFile with a hard byte cap.""" data = await upload.read(limit + 1) diff --git a/tests/test_direct_upload_limits.py b/tests/test_direct_upload_limits.py index d150d7e97..59eef9861 100644 --- a/tests/test_direct_upload_limits.py +++ b/tests/test_direct_upload_limits.py @@ -48,7 +48,7 @@ def test_direct_upload_routes_use_bounded_reads(): "read_upload_limited(file, MEMORY_IMPORT_MAX_BYTES", ], "routes/calendar_routes.py": [ - "read_upload_limited(file, _ICS_MAX_BYTES", + "read_upload_limited(file, ICS_MAX_BYTES", ], "routes/email_routes.py": [ "read_upload_limited(file, EMAIL_COMPOSE_UPLOAD_MAX_BYTES", diff --git a/tests/test_upload_limits_centralized.py b/tests/test_upload_limits_centralized.py new file mode 100644 index 000000000..a870228fa --- /dev/null +++ b/tests/test_upload_limits_centralized.py @@ -0,0 +1,110 @@ +"""Centralized upload byte-limits (issue #3364). + +Every per-route upload limit lives in ``src.upload_limits`` as a module-level +constant read through the validated ``read_byte_limit_env``. These tests pin: +- the default values (unchanged from the prior per-route literals), +- env-overridability for each one, +- that an invalid env value fails fast (validation), and +- that the routes import the constant from upload_limits rather than redefining + it locally (no scattered raw getenv / hardcoded literal). +""" + +import importlib +from pathlib import Path + +import pytest + +import src.upload_limits as upload_limits + +REPO = Path(__file__).resolve().parent.parent + +# const name -> (env var, default bytes) +_LIMITS = { + "GALLERY_UPLOAD_MAX_BYTES": ("ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES", 100 * 1024 * 1024), + "GALLERY_TRANSFORM_UPLOAD_MAX_BYTES": ("ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES", 25 * 1024 * 1024), + "MEMORY_IMPORT_MAX_BYTES": ("ODYSSEUS_MEMORY_IMPORT_MAX_BYTES", 10 * 1024 * 1024), + "PERSONAL_UPLOAD_MAX_BYTES": ("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", 25 * 1024 * 1024), + "EMAIL_COMPOSE_UPLOAD_MAX_BYTES": ("ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES", 25 * 1024 * 1024), + "STT_MAX_AUDIO_BYTES": ("ODYSSEUS_STT_MAX_AUDIO_BYTES", 25 * 1024 * 1024), + "ICS_MAX_BYTES": ("ODYSSEUS_ICS_MAX_BYTES", 10 * 1024 * 1024), +} + + +def _reload_clean(monkeypatch): + """Reload upload_limits with all the limit env vars unset.""" + for env, _ in _LIMITS.values(): + monkeypatch.delenv(env, raising=False) + return importlib.reload(upload_limits) + + +@pytest.fixture(autouse=True) +def _restore_module(): + # Ensure later tests see the env-default module, not a test-mutated reload. + yield + importlib.reload(upload_limits) + + +@pytest.mark.parametrize("name,env,default", [(n, e, d) for n, (e, d) in _LIMITS.items()]) +def test_default_value(monkeypatch, name, env, default): + mod = _reload_clean(monkeypatch) + assert getattr(mod, name) == default + + +@pytest.mark.parametrize("name,env,default", [(n, e, d) for n, (e, d) in _LIMITS.items()]) +def test_env_override(monkeypatch, name, env, default): + for e, _ in _LIMITS.values(): + monkeypatch.delenv(e, raising=False) + monkeypatch.setenv(env, "4242") + mod = importlib.reload(upload_limits) + assert getattr(mod, name) == 4242 + + +@pytest.mark.parametrize("env", [e for e, _ in _LIMITS.values()]) +def test_invalid_env_fails_fast(monkeypatch, env): + for e, _ in _LIMITS.values(): + monkeypatch.delenv(e, raising=False) + monkeypatch.setenv(env, "not-an-int") + with pytest.raises(ValueError, match=env): + importlib.reload(upload_limits) + + +@pytest.mark.parametrize("env", [e for e, _ in _LIMITS.values()]) +def test_non_positive_env_rejected(monkeypatch, env): + for e, _ in _LIMITS.values(): + monkeypatch.delenv(e, raising=False) + monkeypatch.setenv(env, "0") + with pytest.raises(ValueError, match="greater than 0"): + importlib.reload(upload_limits) + + +def test_routes_import_from_upload_limits_not_local_defs(): + """Routes must import the constant, not redefine it via raw getenv / literal.""" + forbidden = { + "routes/gallery_routes.py": [ + 'int(os.getenv("ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES"', + 'int(os.getenv("ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES"', + ], + "routes/memory_routes.py": ['int(os.getenv("ODYSSEUS_MEMORY_IMPORT_MAX_BYTES"'], + "routes/personal_routes.py": ['os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES"'], + "routes/email_routes.py": ["EMAIL_COMPOSE_UPLOAD_MAX_BYTES = 25 * 1024 * 1024"], + "routes/stt_routes.py": ["STT_MAX_AUDIO_BYTES = 25 * 1024 * 1024"], + "routes/calendar_routes.py": ["_ICS_MAX_BYTES = 10 * 1024 * 1024"], + } + for path, needles in forbidden.items(): + text = (REPO / path).read_text(encoding="utf-8") + for needle in needles: + assert needle not in text, f"{path} still defines limit locally: {needle}" + + # And each imports from upload_limits. + imports = { + "routes/gallery_routes.py": "GALLERY_UPLOAD_MAX_BYTES", + "routes/memory_routes.py": "MEMORY_IMPORT_MAX_BYTES", + "routes/personal_routes.py": "PERSONAL_UPLOAD_MAX_BYTES", + "routes/email_routes.py": "EMAIL_COMPOSE_UPLOAD_MAX_BYTES", + "routes/stt_routes.py": "STT_MAX_AUDIO_BYTES", + "routes/calendar_routes.py": "ICS_MAX_BYTES", + } + for path, const in imports.items(): + text = (REPO / path).read_text(encoding="utf-8") + assert "from src.upload_limits import" in text + assert const in text