mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-29 16:12:06 -04:00
fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF (#2103)
* fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF Fixes #2101. Two related bugs in the PDF-form library preview flow: 1. SecurityHeadersMiddleware was sending X-Frame-Options: DENY and frame-ancestors 'none' on /api/document/{doc_id}/render-pdf, but static/js/documentLibrary.js embeds the response in an <iframe> for the library card preview. The browser blocked the load with ERR_BLOCKED_BY_RESPONSE, leaving the user with a blank panel. Extend the existing is_tool_render exemption to also cover /api/document/.../render-pdf. Per-document owner checks still run in the route handler, so the exemption is scoped the same way as the tool-render exemption it mirrors. /api/document/.../export-pdf is left untouched — it's a download (Content-Disposition: attachment), not an iframe embed. 2. routes/document_routes.py:render_pdf called fill_fields, which raises RuntimeError via _require_fitz() when the optional PyMuPDF dependency isn't installed. That RuntimeError bubbled out as a generic 500 with a cryptic 'PDF render failed' detail. Reuse the existing _load_pdf_viewer_fitz() helper to fail fast with a 503 and a user-actionable install hint (mentions requirements-optional.txt and AGPL-3.0), matching the convention used by the other PDF endpoints. Tests cover both fixes: - middleware headers on /api/document/.../render-pdf (iframeable, but X-Content-Type-Options and Referrer-Policy are still set) - middleware headers on /api/document/.../export-pdf (must stay strict) - middleware path matching precision (similar-but-different paths stay strict) - middleware headers on /api/tools/.../render (no regression) - middleware headers on /api/chat (no regression) - render-pdf returns 503 with install hint when PyMuPDF is missing - 503 is raised before any file I/O (fail-fast ordering) * chore: address maintainer feedback on PDF previews same-origin framing and comment trimming * chore: make render-pdf regression tests order-independent
This commit is contained in:
+3
-6
@@ -67,10 +67,9 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
|||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
path = request.url.path
|
path = request.url.path
|
||||||
|
|
||||||
# Tool render endpoints are served inside iframes — allow framing by self
|
# Tool render endpoints
|
||||||
is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
|
is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
|
||||||
# PDF previews are embedded by the in-app document library. Keep the
|
# Document library PDF preview endpoint
|
||||||
# exception route-scoped so normal app pages remain unframeable.
|
|
||||||
is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
|
is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
|
||||||
# Visual report pages are self-contained HTML — need inline scripts + external images
|
# Visual report pages are self-contained HTML — need inline scripts + external images
|
||||||
is_report = path.startswith("/api/research/report/")
|
is_report = path.startswith("/api/research/report/")
|
||||||
@@ -97,9 +96,7 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
|||||||
"frame-ancestors 'none'"
|
"frame-ancestors 'none'"
|
||||||
)
|
)
|
||||||
elif is_tool_render:
|
elif is_tool_render:
|
||||||
# Tool iframe content: skip all framing headers — the iframe's
|
# Skip framing headers for tools.
|
||||||
# sandbox="allow-scripts" attribute provides isolation.
|
|
||||||
# Don't overwrite the route's own restrictive CSP either.
|
|
||||||
pass
|
pass
|
||||||
elif is_document_pdf_preview:
|
elif is_document_pdf_preview:
|
||||||
response.headers["X-Frame-Options"] = "SAMEORIGIN"
|
response.headers["X-Frame-Options"] = "SAMEORIGIN"
|
||||||
|
|||||||
@@ -1332,6 +1332,12 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
|||||||
if not pdf_path:
|
if not pdf_path:
|
||||||
raise HTTPException(404, f"Source PDF {upload_id} not found")
|
raise HTTPException(404, f"Source PDF {upload_id} not found")
|
||||||
|
|
||||||
|
# Fail fast with a clear 503 if the optional PyMuPDF dependency
|
||||||
|
# is missing — fill_fields/stamp_annotations will otherwise
|
||||||
|
# raise RuntimeError deep inside and bubble out as a 500.
|
||||||
|
# Mirrors the convention in _load_pdf_viewer_fitz above.
|
||||||
|
_load_pdf_viewer_fitz()
|
||||||
|
|
||||||
values = parse_markdown_to_values(doc.current_content or "")
|
values = parse_markdown_to_values(doc.current_content or "")
|
||||||
out_path = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
|
out_path = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
|
||||||
_to_unlink.append(out_path)
|
_to_unlink.append(out_path)
|
||||||
|
|||||||
@@ -0,0 +1,238 @@
|
|||||||
|
"""Regression tests for the document PDF preview framing headers and PyMuPDF dependency handling."""
|
||||||
|
|
||||||
|
import builtins
|
||||||
|
import tempfile
|
||||||
|
import uuid
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from sqlalchemy import create_engine
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
from sqlalchemy.pool import NullPool
|
||||||
|
|
||||||
|
import core.database as cdb
|
||||||
|
import routes.document_routes as droutes
|
||||||
|
from core.database import Document
|
||||||
|
from core.middleware import SecurityHeadersMiddleware
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeURL:
|
||||||
|
def __init__(self, path: str):
|
||||||
|
self.path = path
|
||||||
|
self.scheme = "http"
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeRequest:
|
||||||
|
def __init__(self, path: str):
|
||||||
|
self.url = _FakeURL(path)
|
||||||
|
self.headers = {}
|
||||||
|
self.state = SimpleNamespace()
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeResponse:
|
||||||
|
def __init__(self):
|
||||||
|
self.headers: dict[str, str] = {}
|
||||||
|
|
||||||
|
|
||||||
|
async def _dispatch(path: str) -> _FakeResponse:
|
||||||
|
mw = SecurityHeadersMiddleware(MagicMock())
|
||||||
|
resp = _FakeResponse()
|
||||||
|
call_next = AsyncMock(return_value=resp)
|
||||||
|
await mw.dispatch(_FakeRequest(path), call_next)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 1: middleware framing policy on /api/document/.../render-pdf
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
async def test_doc_render_pdf_same_origin_framing():
|
||||||
|
"""Assert that /api/document/{id}/render-pdf allows same-origin framing."""
|
||||||
|
resp = await _dispatch("/api/document/abc-123/render-pdf")
|
||||||
|
|
||||||
|
assert resp.headers.get("X-Frame-Options") == "SAMEORIGIN"
|
||||||
|
csp = resp.headers.get("Content-Security-Policy", "")
|
||||||
|
assert "frame-ancestors 'self'" in csp
|
||||||
|
|
||||||
|
|
||||||
|
async def test_doc_render_pdf_keeps_baseline_security_headers():
|
||||||
|
"""Assert that baseline security headers are preserved on the render-pdf path."""
|
||||||
|
resp = await _dispatch("/api/document/abc-123/render-pdf")
|
||||||
|
|
||||||
|
assert resp.headers.get("X-Content-Type-Options") == "nosniff"
|
||||||
|
assert resp.headers.get("Referrer-Policy") == "no-referrer"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_doc_export_pdf_still_frame_blocked():
|
||||||
|
"""Assert that the export-pdf path remains frame-blocked."""
|
||||||
|
resp = await _dispatch("/api/document/abc-123/export-pdf")
|
||||||
|
|
||||||
|
assert resp.headers.get("X-Frame-Options") == "DENY"
|
||||||
|
assert "frame-ancestors 'none'" in resp.headers.get("Content-Security-Policy", "")
|
||||||
|
|
||||||
|
|
||||||
|
async def test_doc_path_matching_is_precise():
|
||||||
|
"""Assert that similar paths are not exempted from framing restrictions."""
|
||||||
|
for path in [
|
||||||
|
"/api/document/abc-123/render-pdfx",
|
||||||
|
"/api/document/abc-123/render-pdf/foo",
|
||||||
|
"/api/documents/abc-123/render-pdf",
|
||||||
|
]:
|
||||||
|
resp = await _dispatch(path)
|
||||||
|
assert resp.headers.get("X-Frame-Options") == "DENY"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_tool_render_exemption_preserved():
|
||||||
|
"""Assert that the tool-render path remains exempt from framing headers."""
|
||||||
|
resp = await _dispatch("/api/tools/foo/bar/render")
|
||||||
|
|
||||||
|
assert "X-Frame-Options" not in resp.headers
|
||||||
|
csp = resp.headers.get("Content-Security-Policy", "")
|
||||||
|
assert "frame-ancestors" not in csp
|
||||||
|
|
||||||
|
|
||||||
|
async def test_unrelated_paths_keep_strict_policy():
|
||||||
|
"""Assert that other paths keep the strict framing policy."""
|
||||||
|
resp = await _dispatch("/api/chat")
|
||||||
|
|
||||||
|
assert resp.headers.get("X-Frame-Options") == "DENY"
|
||||||
|
csp = resp.headers.get("Content-Security-Policy", "")
|
||||||
|
assert "frame-ancestors 'none'" in csp
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 2: render-pdf route must return 503 (not 500) when PyMuPDF is missing
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_db(monkeypatch):
|
||||||
|
"""Create a temporary SQLite database and patch routes.document_routes.SessionLocal."""
|
||||||
|
import os
|
||||||
|
tmpdb = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
|
||||||
|
tmpdb.close()
|
||||||
|
engine = create_engine(
|
||||||
|
f"sqlite:///{tmpdb.name}",
|
||||||
|
connect_args={"check_same_thread": False},
|
||||||
|
poolclass=NullPool,
|
||||||
|
)
|
||||||
|
cdb.Base.metadata.create_all(engine)
|
||||||
|
ts = sessionmaker(bind=engine, autoflush=False, autocommit=False)
|
||||||
|
monkeypatch.setattr(droutes, "SessionLocal", ts)
|
||||||
|
try:
|
||||||
|
yield ts
|
||||||
|
finally:
|
||||||
|
engine.dispose()
|
||||||
|
try:
|
||||||
|
os.unlink(tmpdb.name)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req():
|
||||||
|
"""Minimal request stub."""
|
||||||
|
return SimpleNamespace(
|
||||||
|
state=SimpleNamespace(current_user="tester"),
|
||||||
|
app=SimpleNamespace(state=SimpleNamespace(auth_manager=None)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _endpoint(method: str, path: str, upload_handler=None):
|
||||||
|
router = droutes.setup_document_routes(MagicMock(), upload_handler)
|
||||||
|
for r in router.routes:
|
||||||
|
if getattr(r, "path", None) == path and method in getattr(r, "methods", set()):
|
||||||
|
return r.endpoint
|
||||||
|
raise RuntimeError(f"{method} {path} not found")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_pdf_doc(db_session) -> str:
|
||||||
|
"""Create a test Document with a pdf_form_source front-matter pointer."""
|
||||||
|
content = (
|
||||||
|
'<!-- pdf_form_source upload_id="'
|
||||||
|
+ "a" * 32
|
||||||
|
+ '" fields="3" -->\n'
|
||||||
|
"- Field 1: value1\n- Field 2: value2\n- Field 3: value3\n"
|
||||||
|
)
|
||||||
|
db = db_session()
|
||||||
|
try:
|
||||||
|
doc = Document(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
session_id=None,
|
||||||
|
title="t",
|
||||||
|
language="markdown",
|
||||||
|
current_content=content,
|
||||||
|
version_count=1,
|
||||||
|
is_active=True,
|
||||||
|
owner="tester",
|
||||||
|
)
|
||||||
|
db.add(doc)
|
||||||
|
db.commit()
|
||||||
|
return doc.id
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_render_pdf_returns_503_when_pymupdf_missing(monkeypatch, test_db):
|
||||||
|
"""Assert that the render-pdf path returns 503 when PyMuPDF is not installed."""
|
||||||
|
real_import = builtins.__import__
|
||||||
|
|
||||||
|
def fake_import(name, *args, **kwargs):
|
||||||
|
if name == "fitz":
|
||||||
|
raise ImportError("No module named 'fitz'")
|
||||||
|
return real_import(name, *args, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(builtins, "__import__", fake_import)
|
||||||
|
|
||||||
|
# Stub route dependencies to isolate the PyMuPDF check
|
||||||
|
import src.pdf_form_doc as pdf_form_doc
|
||||||
|
monkeypatch.setattr(pdf_form_doc, "find_source_upload_id", lambda _content: "a" * 32)
|
||||||
|
monkeypatch.setattr(droutes, "_resolve_user_upload_path", lambda *a, **kw: "/tmp/fake.pdf")
|
||||||
|
|
||||||
|
render_pdf = _endpoint("GET", "/api/document/{doc_id}/render-pdf", upload_handler=MagicMock())
|
||||||
|
doc_id = _make_pdf_doc(test_db)
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
with pytest.raises(HTTPException) as excinfo:
|
||||||
|
await render_pdf(doc_id, _req())
|
||||||
|
|
||||||
|
assert excinfo.value.status_code == 503
|
||||||
|
detail = str(excinfo.value.detail)
|
||||||
|
assert "requirements-optional.txt" in detail
|
||||||
|
assert "PyMuPDF" in detail
|
||||||
|
|
||||||
|
|
||||||
|
async def test_render_pdf_503_runs_before_file_io(monkeypatch, test_db, tmp_path):
|
||||||
|
"""Assert that the PyMuPDF check runs before resolving or checking the source file path."""
|
||||||
|
real_import = builtins.__import__
|
||||||
|
|
||||||
|
def fake_import(name, *args, **kwargs):
|
||||||
|
if name == "fitz":
|
||||||
|
raise ImportError("No module named 'fitz'")
|
||||||
|
return real_import(name, *args, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(builtins, "__import__", fake_import)
|
||||||
|
|
||||||
|
# Use a non-existent path to verify the check fails before checking path existence
|
||||||
|
sentinel_dir = tmp_path / "should-never-be-touched"
|
||||||
|
sentinel_dir.mkdir()
|
||||||
|
sentinel_path = str(sentinel_dir / "source.pdf")
|
||||||
|
|
||||||
|
import src.pdf_form_doc as pdf_form_doc
|
||||||
|
monkeypatch.setattr(pdf_form_doc, "find_source_upload_id", lambda _content: "a" * 32)
|
||||||
|
monkeypatch.setattr(droutes, "_resolve_user_upload_path", lambda *a, **kw: sentinel_path)
|
||||||
|
|
||||||
|
render_pdf = _endpoint("GET", "/api/document/{doc_id}/render-pdf", upload_handler=MagicMock())
|
||||||
|
doc_id = _make_pdf_doc(test_db)
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
with pytest.raises(HTTPException) as excinfo:
|
||||||
|
await render_pdf(doc_id, _req())
|
||||||
|
|
||||||
|
assert excinfo.value.status_code == 503
|
||||||
Reference in New Issue
Block a user