fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF (#2103)

* fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF

Fixes #2101.

Two related bugs in the PDF-form library preview flow:

1. SecurityHeadersMiddleware was sending X-Frame-Options: DENY and
   frame-ancestors 'none' on /api/document/{doc_id}/render-pdf, but
   static/js/documentLibrary.js embeds the response in an <iframe> for
   the library card preview. The browser blocked the load with
   ERR_BLOCKED_BY_RESPONSE, leaving the user with a blank panel.

   Extend the existing is_tool_render exemption to also cover
   /api/document/.../render-pdf. Per-document owner checks still run in
   the route handler, so the exemption is scoped the same way as the
   tool-render exemption it mirrors. /api/document/.../export-pdf is
   left untouched — it's a download (Content-Disposition: attachment),
   not an iframe embed.

2. routes/document_routes.py:render_pdf called fill_fields, which
   raises RuntimeError via _require_fitz() when the optional PyMuPDF
   dependency isn't installed. That RuntimeError bubbled out as a
   generic 500 with a cryptic 'PDF render failed' detail.

   Reuse the existing _load_pdf_viewer_fitz() helper to fail fast with
   a 503 and a user-actionable install hint (mentions
   requirements-optional.txt and AGPL-3.0), matching the convention
   used by the other PDF endpoints.

Tests cover both fixes:
- middleware headers on /api/document/.../render-pdf (iframeable, but
  X-Content-Type-Options and Referrer-Policy are still set)
- middleware headers on /api/document/.../export-pdf (must stay strict)
- middleware path matching precision (similar-but-different paths stay
  strict)
- middleware headers on /api/tools/.../render (no regression)
- middleware headers on /api/chat (no regression)
- render-pdf returns 503 with install hint when PyMuPDF is missing
- 503 is raised before any file I/O (fail-fast ordering)

* chore: address maintainer feedback on PDF previews same-origin framing and comment trimming

* chore: make render-pdf regression tests order-independent
This commit is contained in:
Shreyas S Joshi
2026-06-18 11:55:26 +05:30
committed by GitHub
parent 56ba144875
commit f70db19cc6
3 changed files with 247 additions and 6 deletions
+3 -6
View File
@@ -67,10 +67,9 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
response = await call_next(request)
path = request.url.path
# Tool render endpoints are served inside iframes — allow framing by self
# Tool render endpoints
is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
# PDF previews are embedded by the in-app document library. Keep the
# exception route-scoped so normal app pages remain unframeable.
# Document library PDF preview endpoint
is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
# Visual report pages are self-contained HTML — need inline scripts + external images
is_report = path.startswith("/api/research/report/")
@@ -97,9 +96,7 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
"frame-ancestors 'none'"
)
elif is_tool_render:
# Tool iframe content: skip all framing headers — the iframe's
# sandbox="allow-scripts" attribute provides isolation.
# Don't overwrite the route's own restrictive CSP either.
# Skip framing headers for tools.
pass
elif is_document_pdf_preview:
response.headers["X-Frame-Options"] = "SAMEORIGIN"
+6
View File
@@ -1332,6 +1332,12 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
if not pdf_path:
raise HTTPException(404, f"Source PDF {upload_id} not found")
# Fail fast with a clear 503 if the optional PyMuPDF dependency
# is missing — fill_fields/stamp_annotations will otherwise
# raise RuntimeError deep inside and bubble out as a 500.
# Mirrors the convention in _load_pdf_viewer_fitz above.
_load_pdf_viewer_fitz()
values = parse_markdown_to_values(doc.current_content or "")
out_path = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
_to_unlink.append(out_path)
+238
View File
@@ -0,0 +1,238 @@
"""Regression tests for the document PDF preview framing headers and PyMuPDF dependency handling."""
import builtins
import tempfile
import uuid
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import NullPool
import core.database as cdb
import routes.document_routes as droutes
from core.database import Document
from core.middleware import SecurityHeadersMiddleware
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
class _FakeURL:
def __init__(self, path: str):
self.path = path
self.scheme = "http"
class _FakeRequest:
def __init__(self, path: str):
self.url = _FakeURL(path)
self.headers = {}
self.state = SimpleNamespace()
class _FakeResponse:
def __init__(self):
self.headers: dict[str, str] = {}
async def _dispatch(path: str) -> _FakeResponse:
mw = SecurityHeadersMiddleware(MagicMock())
resp = _FakeResponse()
call_next = AsyncMock(return_value=resp)
await mw.dispatch(_FakeRequest(path), call_next)
return resp
# ---------------------------------------------------------------------------
# Test 1: middleware framing policy on /api/document/.../render-pdf
# ---------------------------------------------------------------------------
async def test_doc_render_pdf_same_origin_framing():
"""Assert that /api/document/{id}/render-pdf allows same-origin framing."""
resp = await _dispatch("/api/document/abc-123/render-pdf")
assert resp.headers.get("X-Frame-Options") == "SAMEORIGIN"
csp = resp.headers.get("Content-Security-Policy", "")
assert "frame-ancestors 'self'" in csp
async def test_doc_render_pdf_keeps_baseline_security_headers():
"""Assert that baseline security headers are preserved on the render-pdf path."""
resp = await _dispatch("/api/document/abc-123/render-pdf")
assert resp.headers.get("X-Content-Type-Options") == "nosniff"
assert resp.headers.get("Referrer-Policy") == "no-referrer"
async def test_doc_export_pdf_still_frame_blocked():
"""Assert that the export-pdf path remains frame-blocked."""
resp = await _dispatch("/api/document/abc-123/export-pdf")
assert resp.headers.get("X-Frame-Options") == "DENY"
assert "frame-ancestors 'none'" in resp.headers.get("Content-Security-Policy", "")
async def test_doc_path_matching_is_precise():
"""Assert that similar paths are not exempted from framing restrictions."""
for path in [
"/api/document/abc-123/render-pdfx",
"/api/document/abc-123/render-pdf/foo",
"/api/documents/abc-123/render-pdf",
]:
resp = await _dispatch(path)
assert resp.headers.get("X-Frame-Options") == "DENY"
async def test_tool_render_exemption_preserved():
"""Assert that the tool-render path remains exempt from framing headers."""
resp = await _dispatch("/api/tools/foo/bar/render")
assert "X-Frame-Options" not in resp.headers
csp = resp.headers.get("Content-Security-Policy", "")
assert "frame-ancestors" not in csp
async def test_unrelated_paths_keep_strict_policy():
"""Assert that other paths keep the strict framing policy."""
resp = await _dispatch("/api/chat")
assert resp.headers.get("X-Frame-Options") == "DENY"
csp = resp.headers.get("Content-Security-Policy", "")
assert "frame-ancestors 'none'" in csp
# ---------------------------------------------------------------------------
# Test 2: render-pdf route must return 503 (not 500) when PyMuPDF is missing
# ---------------------------------------------------------------------------
@pytest.fixture
def test_db(monkeypatch):
"""Create a temporary SQLite database and patch routes.document_routes.SessionLocal."""
import os
tmpdb = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
tmpdb.close()
engine = create_engine(
f"sqlite:///{tmpdb.name}",
connect_args={"check_same_thread": False},
poolclass=NullPool,
)
cdb.Base.metadata.create_all(engine)
ts = sessionmaker(bind=engine, autoflush=False, autocommit=False)
monkeypatch.setattr(droutes, "SessionLocal", ts)
try:
yield ts
finally:
engine.dispose()
try:
os.unlink(tmpdb.name)
except OSError:
pass
def _req():
"""Minimal request stub."""
return SimpleNamespace(
state=SimpleNamespace(current_user="tester"),
app=SimpleNamespace(state=SimpleNamespace(auth_manager=None)),
)
def _endpoint(method: str, path: str, upload_handler=None):
router = droutes.setup_document_routes(MagicMock(), upload_handler)
for r in router.routes:
if getattr(r, "path", None) == path and method in getattr(r, "methods", set()):
return r.endpoint
raise RuntimeError(f"{method} {path} not found")
def _make_pdf_doc(db_session) -> str:
"""Create a test Document with a pdf_form_source front-matter pointer."""
content = (
'<!-- pdf_form_source upload_id="'
+ "a" * 32
+ '" fields="3" -->\n'
"- Field 1: value1\n- Field 2: value2\n- Field 3: value3\n"
)
db = db_session()
try:
doc = Document(
id=str(uuid.uuid4()),
session_id=None,
title="t",
language="markdown",
current_content=content,
version_count=1,
is_active=True,
owner="tester",
)
db.add(doc)
db.commit()
return doc.id
finally:
db.close()
async def test_render_pdf_returns_503_when_pymupdf_missing(monkeypatch, test_db):
"""Assert that the render-pdf path returns 503 when PyMuPDF is not installed."""
real_import = builtins.__import__
def fake_import(name, *args, **kwargs):
if name == "fitz":
raise ImportError("No module named 'fitz'")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", fake_import)
# Stub route dependencies to isolate the PyMuPDF check
import src.pdf_form_doc as pdf_form_doc
monkeypatch.setattr(pdf_form_doc, "find_source_upload_id", lambda _content: "a" * 32)
monkeypatch.setattr(droutes, "_resolve_user_upload_path", lambda *a, **kw: "/tmp/fake.pdf")
render_pdf = _endpoint("GET", "/api/document/{doc_id}/render-pdf", upload_handler=MagicMock())
doc_id = _make_pdf_doc(test_db)
from fastapi import HTTPException
with pytest.raises(HTTPException) as excinfo:
await render_pdf(doc_id, _req())
assert excinfo.value.status_code == 503
detail = str(excinfo.value.detail)
assert "requirements-optional.txt" in detail
assert "PyMuPDF" in detail
async def test_render_pdf_503_runs_before_file_io(monkeypatch, test_db, tmp_path):
"""Assert that the PyMuPDF check runs before resolving or checking the source file path."""
real_import = builtins.__import__
def fake_import(name, *args, **kwargs):
if name == "fitz":
raise ImportError("No module named 'fitz'")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", fake_import)
# Use a non-existent path to verify the check fails before checking path existence
sentinel_dir = tmp_path / "should-never-be-touched"
sentinel_dir.mkdir()
sentinel_path = str(sentinel_dir / "source.pdf")
import src.pdf_form_doc as pdf_form_doc
monkeypatch.setattr(pdf_form_doc, "find_source_upload_id", lambda _content: "a" * 32)
monkeypatch.setattr(droutes, "_resolve_user_upload_path", lambda *a, **kw: sentinel_path)
render_pdf = _endpoint("GET", "/api/document/{doc_id}/render-pdf", upload_handler=MagicMock())
doc_id = _make_pdf_doc(test_db)
from fastapi import HTTPException
with pytest.raises(HTTPException) as excinfo:
await render_pdf(doc_id, _req())
assert excinfo.value.status_code == 503