Enforce owner checks for upload attachments

This commit is contained in:
Duarte Antunes
2026-06-01 08:47:48 +01:00
committed by GitHub
parent 8874a11baf
commit e77d87fa80
6 changed files with 352 additions and 59 deletions
+66 -17
View File
@@ -3,6 +3,8 @@
"""Document routes — CRUD for living documents with version history."""
import logging
import os
import re
from typing import Dict, Any, Optional
from fastapi import HTTPException
@@ -12,6 +14,7 @@ from core.database import Document, DocumentVersion
from core.database import Session as DbSession
logger = logging.getLogger(__name__)
_UPLOAD_ID_RE = re.compile(r"^[0-9a-fA-F]{32}\.[A-Za-z0-9]+$")
# ---- Request schemas ----
@@ -126,40 +129,86 @@ def _slug(name: str) -> str:
_PDF_RENDER_SCALE = 2.0
def _locate_upload(upload_dir: str, file_id: str):
def _upload_path_inside(upload_dir: str, path: str) -> bool:
base = os.path.realpath(upload_dir)
p = os.path.realpath(path)
try:
return os.path.commonpath([base, p]) == base
except Exception:
return False
def _upload_owner_allowed(
meta: Optional[dict],
user: Optional[str],
auth_manager=None,
allow_admin: bool = True,
) -> bool:
if not user:
return (
not bool(auth_manager and getattr(auth_manager, "is_configured", False))
and not (meta and meta.get("owner") is not None)
)
if allow_admin and auth_manager and hasattr(auth_manager, "is_admin"):
try:
if auth_manager.is_admin(user):
return True
except Exception:
pass
return bool(meta and meta.get("owner") == user)
def _locate_upload(upload_dir: str, file_id: str, owner: Optional[str] = None, auth_manager=None):
"""Find an upload by its filename ID.
Lookup order:
1. Direct hit at `upload_dir/file_id` (very small deployments).
2. The `uploads.json` index that `UploadHandler.save_upload` maintains —
maps file_hash → metadata containing the full path. O(1) once loaded.
1. The `uploads.json` index that `UploadHandler.save_upload` maintains,
so owner can be verified before a document reads the source file.
2. Direct hit at `upload_dir/file_id` (very small deployments).
3. Fallback: `os.walk` the date-bucketed tree. Slow on large stores;
only triggers for legacy uploads recorded before the index existed.
only allowed after the index owner check passes, or in single-user /
admin-style contexts where no owner is enforced.
`followlinks=False` keeps a stray symlink loop in `data/uploads/` from
spinning the walker into infinite recursion.
"""
import os
import json as _json
direct = os.path.join(upload_dir, file_id)
if os.path.exists(direct):
return direct
# O(1) via uploads.json
if not _UPLOAD_ID_RE.fullmatch(file_id or ""):
logger.warning("Rejected invalid upload id in document lookup: %r", file_id)
return None
meta = None
try:
idx_path = os.path.join(upload_dir, "uploads.json")
if os.path.exists(idx_path):
with open(idx_path, "r", encoding="utf-8") as f:
idx = _json.load(f)
for meta in (idx.values() if isinstance(idx, dict) else []):
if meta.get("id") == file_id:
p = meta.get("path")
if p and os.path.exists(p):
return p
for item in (idx.values() if isinstance(idx, dict) else []):
if isinstance(item, dict) and item.get("id") == file_id:
meta = item
break
except Exception:
pass
meta = None
if not _upload_owner_allowed(meta, owner, auth_manager):
logger.warning("Upload %s denied for document owner %s", file_id, owner)
return None
if meta:
p = meta.get("path")
if p and os.path.exists(p) and _upload_path_inside(upload_dir, p):
return p
direct = os.path.join(upload_dir, file_id)
if os.path.exists(direct) and _upload_path_inside(upload_dir, direct):
return direct
for root, _dirs, files in os.walk(upload_dir, followlinks=False):
if file_id in files:
return os.path.join(root, file_id)
p = os.path.join(root, file_id)
if _upload_path_inside(upload_dir, p):
return p
return None
+15 -9
View File
@@ -24,6 +24,12 @@ from routes.document_helpers import (
_PDF_RENDER_SCALE,
)
def _locate_current_user_upload(request: Request, upload_dir: str, upload_id: str, user: Optional[str]):
auth_manager = getattr(getattr(request.app, "state", None), "auth_manager", None)
return _locate_upload(upload_dir, upload_id, owner=user, auth_manager=auth_manager)
def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
router = APIRouter(tags=["documents"])
@@ -160,7 +166,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
raise HTTPException(500, f"Upload failed: {e}")
upload_id = meta["id"]
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(500, "Saved PDF could not be located")
@@ -401,7 +407,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
raise HTTPException(400, "Document is not a PDF — no pdf_source marker found")
upload_id = m.group(1)
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(404, "Source PDF could not be located")
@@ -914,7 +920,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
if not upload_id:
raise HTTPException(400, "Document is not linked to a source PDF")
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(404, f"Source PDF {upload_id} not found in uploads")
@@ -978,7 +984,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
upload_id = find_source_upload_id(doc.current_content or "")
if not upload_id:
raise HTTPException(400, "Document is not linked to a source PDF")
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(404, f"Source PDF {upload_id} not found")
@@ -1046,7 +1052,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
upload_id = find_source_upload_id(doc.current_content or "")
if not upload_id:
raise HTTPException(400, "Document is not linked to a source PDF")
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(404, "Source PDF not found")
finally:
@@ -1101,7 +1107,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
upload_id = find_source_upload_id(doc.current_content or "")
if not upload_id:
raise HTTPException(400, "Document is not linked to a source PDF")
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(404, "Source PDF not found")
finally:
@@ -1250,7 +1256,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
upload_id = find_source_upload_id(doc.current_content or "")
if not upload_id:
raise HTTPException(400, "Document is not linked to a source PDF")
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(404, f"Source PDF {upload_id} not found")
@@ -1345,7 +1351,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
if not upload_id:
raise HTTPException(400, "Document is not linked to a source PDF")
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(404, f"Source PDF {upload_id} not found in uploads")
@@ -1489,7 +1495,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
upload_id = find_source_upload_id(doc.current_content or "")
if not upload_id:
raise HTTPException(400, "Document is not linked to a source PDF")
pdf_path = _locate_upload(UPLOAD_DIR, upload_id)
pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user)
if not pdf_path:
raise HTTPException(404, f"Source PDF {upload_id} not found")