Files
odysseus/tests/test_document_render_pdf_iframe.py
T
Shreyas S Joshi f70db19cc6 fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF (#2103)
* fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF

Fixes #2101.

Two related bugs in the PDF-form library preview flow:

1. SecurityHeadersMiddleware was sending X-Frame-Options: DENY and
   frame-ancestors 'none' on /api/document/{doc_id}/render-pdf, but
   static/js/documentLibrary.js embeds the response in an <iframe> for
   the library card preview. The browser blocked the load with
   ERR_BLOCKED_BY_RESPONSE, leaving the user with a blank panel.

   Extend the existing is_tool_render exemption to also cover
   /api/document/.../render-pdf. Per-document owner checks still run in
   the route handler, so the exemption is scoped the same way as the
   tool-render exemption it mirrors. /api/document/.../export-pdf is
   left untouched — it's a download (Content-Disposition: attachment),
   not an iframe embed.

2. routes/document_routes.py:render_pdf called fill_fields, which
   raises RuntimeError via _require_fitz() when the optional PyMuPDF
   dependency isn't installed. That RuntimeError bubbled out as a
   generic 500 with a cryptic 'PDF render failed' detail.

   Reuse the existing _load_pdf_viewer_fitz() helper to fail fast with
   a 503 and a user-actionable install hint (mentions
   requirements-optional.txt and AGPL-3.0), matching the convention
   used by the other PDF endpoints.

Tests cover both fixes:
- middleware headers on /api/document/.../render-pdf (iframeable, but
  X-Content-Type-Options and Referrer-Policy are still set)
- middleware headers on /api/document/.../export-pdf (must stay strict)
- middleware path matching precision (similar-but-different paths stay
  strict)
- middleware headers on /api/tools/.../render (no regression)
- middleware headers on /api/chat (no regression)
- render-pdf returns 503 with install hint when PyMuPDF is missing
- 503 is raised before any file I/O (fail-fast ordering)

* chore: address maintainer feedback on PDF previews same-origin framing and comment trimming

* chore: make render-pdf regression tests order-independent
2026-06-18 06:25:26 +00:00

239 lines
8.0 KiB
Python

"""Regression tests for the document PDF preview framing headers and PyMuPDF dependency handling."""
import builtins
import tempfile
import uuid
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import NullPool
import core.database as cdb
import routes.document_routes as droutes
from core.database import Document
from core.middleware import SecurityHeadersMiddleware
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
class _FakeURL:
def __init__(self, path: str):
self.path = path
self.scheme = "http"
class _FakeRequest:
def __init__(self, path: str):
self.url = _FakeURL(path)
self.headers = {}
self.state = SimpleNamespace()
class _FakeResponse:
def __init__(self):
self.headers: dict[str, str] = {}
async def _dispatch(path: str) -> _FakeResponse:
mw = SecurityHeadersMiddleware(MagicMock())
resp = _FakeResponse()
call_next = AsyncMock(return_value=resp)
await mw.dispatch(_FakeRequest(path), call_next)
return resp
# ---------------------------------------------------------------------------
# Test 1: middleware framing policy on /api/document/.../render-pdf
# ---------------------------------------------------------------------------
async def test_doc_render_pdf_same_origin_framing():
"""Assert that /api/document/{id}/render-pdf allows same-origin framing."""
resp = await _dispatch("/api/document/abc-123/render-pdf")
assert resp.headers.get("X-Frame-Options") == "SAMEORIGIN"
csp = resp.headers.get("Content-Security-Policy", "")
assert "frame-ancestors 'self'" in csp
async def test_doc_render_pdf_keeps_baseline_security_headers():
"""Assert that baseline security headers are preserved on the render-pdf path."""
resp = await _dispatch("/api/document/abc-123/render-pdf")
assert resp.headers.get("X-Content-Type-Options") == "nosniff"
assert resp.headers.get("Referrer-Policy") == "no-referrer"
async def test_doc_export_pdf_still_frame_blocked():
"""Assert that the export-pdf path remains frame-blocked."""
resp = await _dispatch("/api/document/abc-123/export-pdf")
assert resp.headers.get("X-Frame-Options") == "DENY"
assert "frame-ancestors 'none'" in resp.headers.get("Content-Security-Policy", "")
async def test_doc_path_matching_is_precise():
"""Assert that similar paths are not exempted from framing restrictions."""
for path in [
"/api/document/abc-123/render-pdfx",
"/api/document/abc-123/render-pdf/foo",
"/api/documents/abc-123/render-pdf",
]:
resp = await _dispatch(path)
assert resp.headers.get("X-Frame-Options") == "DENY"
async def test_tool_render_exemption_preserved():
"""Assert that the tool-render path remains exempt from framing headers."""
resp = await _dispatch("/api/tools/foo/bar/render")
assert "X-Frame-Options" not in resp.headers
csp = resp.headers.get("Content-Security-Policy", "")
assert "frame-ancestors" not in csp
async def test_unrelated_paths_keep_strict_policy():
"""Assert that other paths keep the strict framing policy."""
resp = await _dispatch("/api/chat")
assert resp.headers.get("X-Frame-Options") == "DENY"
csp = resp.headers.get("Content-Security-Policy", "")
assert "frame-ancestors 'none'" in csp
# ---------------------------------------------------------------------------
# Test 2: render-pdf route must return 503 (not 500) when PyMuPDF is missing
# ---------------------------------------------------------------------------
@pytest.fixture
def test_db(monkeypatch):
"""Create a temporary SQLite database and patch routes.document_routes.SessionLocal."""
import os
tmpdb = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
tmpdb.close()
engine = create_engine(
f"sqlite:///{tmpdb.name}",
connect_args={"check_same_thread": False},
poolclass=NullPool,
)
cdb.Base.metadata.create_all(engine)
ts = sessionmaker(bind=engine, autoflush=False, autocommit=False)
monkeypatch.setattr(droutes, "SessionLocal", ts)
try:
yield ts
finally:
engine.dispose()
try:
os.unlink(tmpdb.name)
except OSError:
pass
def _req():
"""Minimal request stub."""
return SimpleNamespace(
state=SimpleNamespace(current_user="tester"),
app=SimpleNamespace(state=SimpleNamespace(auth_manager=None)),
)
def _endpoint(method: str, path: str, upload_handler=None):
router = droutes.setup_document_routes(MagicMock(), upload_handler)
for r in router.routes:
if getattr(r, "path", None) == path and method in getattr(r, "methods", set()):
return r.endpoint
raise RuntimeError(f"{method} {path} not found")
def _make_pdf_doc(db_session) -> str:
"""Create a test Document with a pdf_form_source front-matter pointer."""
content = (
'<!-- pdf_form_source upload_id="'
+ "a" * 32
+ '" fields="3" -->\n'
"- Field 1: value1\n- Field 2: value2\n- Field 3: value3\n"
)
db = db_session()
try:
doc = Document(
id=str(uuid.uuid4()),
session_id=None,
title="t",
language="markdown",
current_content=content,
version_count=1,
is_active=True,
owner="tester",
)
db.add(doc)
db.commit()
return doc.id
finally:
db.close()
async def test_render_pdf_returns_503_when_pymupdf_missing(monkeypatch, test_db):
"""Assert that the render-pdf path returns 503 when PyMuPDF is not installed."""
real_import = builtins.__import__
def fake_import(name, *args, **kwargs):
if name == "fitz":
raise ImportError("No module named 'fitz'")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", fake_import)
# Stub route dependencies to isolate the PyMuPDF check
import src.pdf_form_doc as pdf_form_doc
monkeypatch.setattr(pdf_form_doc, "find_source_upload_id", lambda _content: "a" * 32)
monkeypatch.setattr(droutes, "_resolve_user_upload_path", lambda *a, **kw: "/tmp/fake.pdf")
render_pdf = _endpoint("GET", "/api/document/{doc_id}/render-pdf", upload_handler=MagicMock())
doc_id = _make_pdf_doc(test_db)
from fastapi import HTTPException
with pytest.raises(HTTPException) as excinfo:
await render_pdf(doc_id, _req())
assert excinfo.value.status_code == 503
detail = str(excinfo.value.detail)
assert "requirements-optional.txt" in detail
assert "PyMuPDF" in detail
async def test_render_pdf_503_runs_before_file_io(monkeypatch, test_db, tmp_path):
"""Assert that the PyMuPDF check runs before resolving or checking the source file path."""
real_import = builtins.__import__
def fake_import(name, *args, **kwargs):
if name == "fitz":
raise ImportError("No module named 'fitz'")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", fake_import)
# Use a non-existent path to verify the check fails before checking path existence
sentinel_dir = tmp_path / "should-never-be-touched"
sentinel_dir.mkdir()
sentinel_path = str(sentinel_dir / "source.pdf")
import src.pdf_form_doc as pdf_form_doc
monkeypatch.setattr(pdf_form_doc, "find_source_upload_id", lambda _content: "a" * 32)
monkeypatch.setattr(droutes, "_resolve_user_upload_path", lambda *a, **kw: sentinel_path)
render_pdf = _endpoint("GET", "/api/document/{doc_id}/render-pdf", upload_handler=MagicMock())
doc_id = _make_pdf_doc(test_db)
from fastapi import HTTPException
with pytest.raises(HTTPException) as excinfo:
await render_pdf(doc_id, _req())
assert excinfo.value.status_code == 503