mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-18 18:55:28 -04:00
fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF (#2103)
* fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF Fixes #2101. Two related bugs in the PDF-form library preview flow: 1. SecurityHeadersMiddleware was sending X-Frame-Options: DENY and frame-ancestors 'none' on /api/document/{doc_id}/render-pdf, but static/js/documentLibrary.js embeds the response in an <iframe> for the library card preview. The browser blocked the load with ERR_BLOCKED_BY_RESPONSE, leaving the user with a blank panel. Extend the existing is_tool_render exemption to also cover /api/document/.../render-pdf. Per-document owner checks still run in the route handler, so the exemption is scoped the same way as the tool-render exemption it mirrors. /api/document/.../export-pdf is left untouched — it's a download (Content-Disposition: attachment), not an iframe embed. 2. routes/document_routes.py:render_pdf called fill_fields, which raises RuntimeError via _require_fitz() when the optional PyMuPDF dependency isn't installed. That RuntimeError bubbled out as a generic 500 with a cryptic 'PDF render failed' detail. Reuse the existing _load_pdf_viewer_fitz() helper to fail fast with a 503 and a user-actionable install hint (mentions requirements-optional.txt and AGPL-3.0), matching the convention used by the other PDF endpoints. Tests cover both fixes: - middleware headers on /api/document/.../render-pdf (iframeable, but X-Content-Type-Options and Referrer-Policy are still set) - middleware headers on /api/document/.../export-pdf (must stay strict) - middleware path matching precision (similar-but-different paths stay strict) - middleware headers on /api/tools/.../render (no regression) - middleware headers on /api/chat (no regression) - render-pdf returns 503 with install hint when PyMuPDF is missing - 503 is raised before any file I/O (fail-fast ordering) * chore: address maintainer feedback on PDF previews same-origin framing and comment trimming * chore: make render-pdf regression tests order-independent
This commit is contained in:
+3
-6
@@ -67,10 +67,9 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
||||
response = await call_next(request)
|
||||
path = request.url.path
|
||||
|
||||
# Tool render endpoints are served inside iframes — allow framing by self
|
||||
# Tool render endpoints
|
||||
is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
|
||||
# PDF previews are embedded by the in-app document library. Keep the
|
||||
# exception route-scoped so normal app pages remain unframeable.
|
||||
# Document library PDF preview endpoint
|
||||
is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
|
||||
# Visual report pages are self-contained HTML — need inline scripts + external images
|
||||
is_report = path.startswith("/api/research/report/")
|
||||
@@ -97,9 +96,7 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
||||
"frame-ancestors 'none'"
|
||||
)
|
||||
elif is_tool_render:
|
||||
# Tool iframe content: skip all framing headers — the iframe's
|
||||
# sandbox="allow-scripts" attribute provides isolation.
|
||||
# Don't overwrite the route's own restrictive CSP either.
|
||||
# Skip framing headers for tools.
|
||||
pass
|
||||
elif is_document_pdf_preview:
|
||||
response.headers["X-Frame-Options"] = "SAMEORIGIN"
|
||||
|
||||
@@ -1332,6 +1332,12 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
||||
if not pdf_path:
|
||||
raise HTTPException(404, f"Source PDF {upload_id} not found")
|
||||
|
||||
# Fail fast with a clear 503 if the optional PyMuPDF dependency
|
||||
# is missing — fill_fields/stamp_annotations will otherwise
|
||||
# raise RuntimeError deep inside and bubble out as a 500.
|
||||
# Mirrors the convention in _load_pdf_viewer_fitz above.
|
||||
_load_pdf_viewer_fitz()
|
||||
|
||||
values = parse_markdown_to_values(doc.current_content or "")
|
||||
out_path = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
|
||||
_to_unlink.append(out_path)
|
||||
|
||||
@@ -0,0 +1,238 @@
|
||||
"""Regression tests for the document PDF preview framing headers and PyMuPDF dependency handling."""
|
||||
|
||||
import builtins
|
||||
import tempfile
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.pool import NullPool
|
||||
|
||||
import core.database as cdb
|
||||
import routes.document_routes as droutes
|
||||
from core.database import Document
|
||||
from core.middleware import SecurityHeadersMiddleware
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeURL:
|
||||
def __init__(self, path: str):
|
||||
self.path = path
|
||||
self.scheme = "http"
|
||||
|
||||
|
||||
class _FakeRequest:
|
||||
def __init__(self, path: str):
|
||||
self.url = _FakeURL(path)
|
||||
self.headers = {}
|
||||
self.state = SimpleNamespace()
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self):
|
||||
self.headers: dict[str, str] = {}
|
||||
|
||||
|
||||
async def _dispatch(path: str) -> _FakeResponse:
|
||||
mw = SecurityHeadersMiddleware(MagicMock())
|
||||
resp = _FakeResponse()
|
||||
call_next = AsyncMock(return_value=resp)
|
||||
await mw.dispatch(_FakeRequest(path), call_next)
|
||||
return resp
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: middleware framing policy on /api/document/.../render-pdf
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_doc_render_pdf_same_origin_framing():
|
||||
"""Assert that /api/document/{id}/render-pdf allows same-origin framing."""
|
||||
resp = await _dispatch("/api/document/abc-123/render-pdf")
|
||||
|
||||
assert resp.headers.get("X-Frame-Options") == "SAMEORIGIN"
|
||||
csp = resp.headers.get("Content-Security-Policy", "")
|
||||
assert "frame-ancestors 'self'" in csp
|
||||
|
||||
|
||||
async def test_doc_render_pdf_keeps_baseline_security_headers():
|
||||
"""Assert that baseline security headers are preserved on the render-pdf path."""
|
||||
resp = await _dispatch("/api/document/abc-123/render-pdf")
|
||||
|
||||
assert resp.headers.get("X-Content-Type-Options") == "nosniff"
|
||||
assert resp.headers.get("Referrer-Policy") == "no-referrer"
|
||||
|
||||
|
||||
async def test_doc_export_pdf_still_frame_blocked():
|
||||
"""Assert that the export-pdf path remains frame-blocked."""
|
||||
resp = await _dispatch("/api/document/abc-123/export-pdf")
|
||||
|
||||
assert resp.headers.get("X-Frame-Options") == "DENY"
|
||||
assert "frame-ancestors 'none'" in resp.headers.get("Content-Security-Policy", "")
|
||||
|
||||
|
||||
async def test_doc_path_matching_is_precise():
|
||||
"""Assert that similar paths are not exempted from framing restrictions."""
|
||||
for path in [
|
||||
"/api/document/abc-123/render-pdfx",
|
||||
"/api/document/abc-123/render-pdf/foo",
|
||||
"/api/documents/abc-123/render-pdf",
|
||||
]:
|
||||
resp = await _dispatch(path)
|
||||
assert resp.headers.get("X-Frame-Options") == "DENY"
|
||||
|
||||
|
||||
async def test_tool_render_exemption_preserved():
|
||||
"""Assert that the tool-render path remains exempt from framing headers."""
|
||||
resp = await _dispatch("/api/tools/foo/bar/render")
|
||||
|
||||
assert "X-Frame-Options" not in resp.headers
|
||||
csp = resp.headers.get("Content-Security-Policy", "")
|
||||
assert "frame-ancestors" not in csp
|
||||
|
||||
|
||||
async def test_unrelated_paths_keep_strict_policy():
|
||||
"""Assert that other paths keep the strict framing policy."""
|
||||
resp = await _dispatch("/api/chat")
|
||||
|
||||
assert resp.headers.get("X-Frame-Options") == "DENY"
|
||||
csp = resp.headers.get("Content-Security-Policy", "")
|
||||
assert "frame-ancestors 'none'" in csp
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: render-pdf route must return 503 (not 500) when PyMuPDF is missing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_db(monkeypatch):
|
||||
"""Create a temporary SQLite database and patch routes.document_routes.SessionLocal."""
|
||||
import os
|
||||
tmpdb = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
|
||||
tmpdb.close()
|
||||
engine = create_engine(
|
||||
f"sqlite:///{tmpdb.name}",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=NullPool,
|
||||
)
|
||||
cdb.Base.metadata.create_all(engine)
|
||||
ts = sessionmaker(bind=engine, autoflush=False, autocommit=False)
|
||||
monkeypatch.setattr(droutes, "SessionLocal", ts)
|
||||
try:
|
||||
yield ts
|
||||
finally:
|
||||
engine.dispose()
|
||||
try:
|
||||
os.unlink(tmpdb.name)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _req():
|
||||
"""Minimal request stub."""
|
||||
return SimpleNamespace(
|
||||
state=SimpleNamespace(current_user="tester"),
|
||||
app=SimpleNamespace(state=SimpleNamespace(auth_manager=None)),
|
||||
)
|
||||
|
||||
|
||||
def _endpoint(method: str, path: str, upload_handler=None):
|
||||
router = droutes.setup_document_routes(MagicMock(), upload_handler)
|
||||
for r in router.routes:
|
||||
if getattr(r, "path", None) == path and method in getattr(r, "methods", set()):
|
||||
return r.endpoint
|
||||
raise RuntimeError(f"{method} {path} not found")
|
||||
|
||||
|
||||
def _make_pdf_doc(db_session) -> str:
|
||||
"""Create a test Document with a pdf_form_source front-matter pointer."""
|
||||
content = (
|
||||
'<!-- pdf_form_source upload_id="'
|
||||
+ "a" * 32
|
||||
+ '" fields="3" -->\n'
|
||||
"- Field 1: value1\n- Field 2: value2\n- Field 3: value3\n"
|
||||
)
|
||||
db = db_session()
|
||||
try:
|
||||
doc = Document(
|
||||
id=str(uuid.uuid4()),
|
||||
session_id=None,
|
||||
title="t",
|
||||
language="markdown",
|
||||
current_content=content,
|
||||
version_count=1,
|
||||
is_active=True,
|
||||
owner="tester",
|
||||
)
|
||||
db.add(doc)
|
||||
db.commit()
|
||||
return doc.id
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def test_render_pdf_returns_503_when_pymupdf_missing(monkeypatch, test_db):
|
||||
"""Assert that the render-pdf path returns 503 when PyMuPDF is not installed."""
|
||||
real_import = builtins.__import__
|
||||
|
||||
def fake_import(name, *args, **kwargs):
|
||||
if name == "fitz":
|
||||
raise ImportError("No module named 'fitz'")
|
||||
return real_import(name, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(builtins, "__import__", fake_import)
|
||||
|
||||
# Stub route dependencies to isolate the PyMuPDF check
|
||||
import src.pdf_form_doc as pdf_form_doc
|
||||
monkeypatch.setattr(pdf_form_doc, "find_source_upload_id", lambda _content: "a" * 32)
|
||||
monkeypatch.setattr(droutes, "_resolve_user_upload_path", lambda *a, **kw: "/tmp/fake.pdf")
|
||||
|
||||
render_pdf = _endpoint("GET", "/api/document/{doc_id}/render-pdf", upload_handler=MagicMock())
|
||||
doc_id = _make_pdf_doc(test_db)
|
||||
|
||||
from fastapi import HTTPException
|
||||
with pytest.raises(HTTPException) as excinfo:
|
||||
await render_pdf(doc_id, _req())
|
||||
|
||||
assert excinfo.value.status_code == 503
|
||||
detail = str(excinfo.value.detail)
|
||||
assert "requirements-optional.txt" in detail
|
||||
assert "PyMuPDF" in detail
|
||||
|
||||
|
||||
async def test_render_pdf_503_runs_before_file_io(monkeypatch, test_db, tmp_path):
|
||||
"""Assert that the PyMuPDF check runs before resolving or checking the source file path."""
|
||||
real_import = builtins.__import__
|
||||
|
||||
def fake_import(name, *args, **kwargs):
|
||||
if name == "fitz":
|
||||
raise ImportError("No module named 'fitz'")
|
||||
return real_import(name, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(builtins, "__import__", fake_import)
|
||||
|
||||
# Use a non-existent path to verify the check fails before checking path existence
|
||||
sentinel_dir = tmp_path / "should-never-be-touched"
|
||||
sentinel_dir.mkdir()
|
||||
sentinel_path = str(sentinel_dir / "source.pdf")
|
||||
|
||||
import src.pdf_form_doc as pdf_form_doc
|
||||
monkeypatch.setattr(pdf_form_doc, "find_source_upload_id", lambda _content: "a" * 32)
|
||||
monkeypatch.setattr(droutes, "_resolve_user_upload_path", lambda *a, **kw: sentinel_path)
|
||||
|
||||
render_pdf = _endpoint("GET", "/api/document/{doc_id}/render-pdf", upload_handler=MagicMock())
|
||||
doc_id = _make_pdf_doc(test_db)
|
||||
|
||||
from fastapi import HTTPException
|
||||
with pytest.raises(HTTPException) as excinfo:
|
||||
await render_pdf(doc_id, _req())
|
||||
|
||||
assert excinfo.value.status_code == 503
|
||||
Reference in New Issue
Block a user