Files
Shreyas S Joshi f70db19cc6 fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF (#2103)
* fix(document): allow render-pdf to be framed and 503 cleanly on missing PyMuPDF

Fixes #2101.

Two related bugs in the PDF-form library preview flow:

1. SecurityHeadersMiddleware was sending X-Frame-Options: DENY and
   frame-ancestors 'none' on /api/document/{doc_id}/render-pdf, but
   static/js/documentLibrary.js embeds the response in an <iframe> for
   the library card preview. The browser blocked the load with
   ERR_BLOCKED_BY_RESPONSE, leaving the user with a blank panel.

   Extend the existing is_tool_render exemption to also cover
   /api/document/.../render-pdf. Per-document owner checks still run in
   the route handler, so the exemption is scoped the same way as the
   tool-render exemption it mirrors. /api/document/.../export-pdf is
   left untouched — it's a download (Content-Disposition: attachment),
   not an iframe embed.

2. routes/document_routes.py:render_pdf called fill_fields, which
   raises RuntimeError via _require_fitz() when the optional PyMuPDF
   dependency isn't installed. That RuntimeError bubbled out as a
   generic 500 with a cryptic 'PDF render failed' detail.

   Reuse the existing _load_pdf_viewer_fitz() helper to fail fast with
   a 503 and a user-actionable install hint (mentions
   requirements-optional.txt and AGPL-3.0), matching the convention
   used by the other PDF endpoints.

Tests cover both fixes:
- middleware headers on /api/document/.../render-pdf (iframeable, but
  X-Content-Type-Options and Referrer-Policy are still set)
- middleware headers on /api/document/.../export-pdf (must stay strict)
- middleware path matching precision (similar-but-different paths stay
  strict)
- middleware headers on /api/tools/.../render (no regression)
- middleware headers on /api/chat (no regression)
- render-pdf returns 503 with install hint when PyMuPDF is missing
- 503 is raised before any file I/O (fail-fast ordering)

* chore: address maintainer feedback on PDF previews same-origin framing and comment trimming

* chore: make render-pdf regression tests order-independent
2026-06-18 06:25:26 +00:00

127 lines
5.5 KiB
Python

# src/middleware.py
# Shared middleware, decorators, and request helpers
import os
import secrets
from fastapi import HTTPException, Request
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import Response
# Per-process token that lets the in-app tool layer hit admin-gated
# routes via HTTP loopback (the agent's tool calls don't carry the
# admin user's session cookie). Set once at import; tools read the
# same value from this module. Never persisted or exposed externally.
INTERNAL_TOOL_TOKEN = os.environ.get("ODYSSEUS_INTERNAL_TOKEN") or secrets.token_hex(32)
INTERNAL_TOOL_HEADER = "X-Odysseus-Internal-Token"
# Pseudo-username on in-process tool-loopback requests; require_admin trusts it and it is reserved.
INTERNAL_TOOL_USER = "internal-tool"
def is_cors_preflight(method: str, headers) -> bool:
"""True for a genuine CORS preflight: an OPTIONS request carrying the
Access-Control-Request-Method header. Such requests are credential-less by
design and must reach CORSMiddleware to be answered -- gating them on auth
401s the preflight and breaks every cross-origin browser/WebView client.
Pure so it can be unit-tested without standing up the app."""
return method == "OPTIONS" and "access-control-request-method" in headers
def require_admin(request: Request):
"""Raise 403 if the current user isn't an admin.
Allows access when auth is explicitly disabled, or when the request carries
the in-process internal-tool token used by loopback agent tools.
"""
# In-process bypass for tool-layer loopback calls. Two paths:
# (a) header-direct (caller set X-Odysseus-Internal-Token), or
# (b) the auth middleware already validated the token and stamped
# request.state.current_user = "internal-tool".
try:
hdr = request.headers.get(INTERNAL_TOOL_HEADER)
if hdr and secrets.compare_digest(hdr, INTERNAL_TOOL_TOKEN):
return
if getattr(request.state, "current_user", None) == INTERNAL_TOOL_USER:
return
except Exception:
pass
auth_mgr = getattr(request.app.state, "auth_manager", None)
if os.getenv("AUTH_ENABLED", "true").lower() == "false":
return
if not auth_mgr or not auth_mgr.is_configured:
raise HTTPException(403, "Admin only")
user = getattr(request.state, "current_user", None)
if not user or not auth_mgr.is_admin(user):
raise HTTPException(403, "Admin only")
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
"""Add standard security headers to all responses."""
async def dispatch(self, request: Request, call_next) -> Response:
# Generate a per-request nonce for inline scripts
nonce = secrets.token_hex(16)
request.state.csp_nonce = nonce
response = await call_next(request)
path = request.url.path
# Tool render endpoints
is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
# Document library PDF preview endpoint
is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
# Visual report pages are self-contained HTML — need inline scripts + external images
is_report = path.startswith("/api/research/report/")
response.headers["X-Content-Type-Options"] = "nosniff"
response.headers["Referrer-Policy"] = "no-referrer"
response.headers["Permissions-Policy"] = "camera=(), microphone=(self), geolocation=()"
is_https = (
request.url.scheme == "https"
or request.headers.get("X-Forwarded-Proto") == "https"
)
if is_https:
response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
if is_report:
response.headers["Content-Security-Policy"] = (
"default-src 'self'; "
"script-src 'self' 'unsafe-inline'; "
"style-src 'self' 'unsafe-inline'; "
"font-src 'self'; "
"img-src 'self' data: blob: https:; "
"connect-src 'self'; "
"frame-ancestors 'none'"
)
elif is_tool_render:
# Skip framing headers for tools.
pass
elif is_document_pdf_preview:
response.headers["X-Frame-Options"] = "SAMEORIGIN"
response.headers["Content-Security-Policy"] = (
"default-src 'none'; "
"frame-ancestors 'self'"
)
else:
response.headers["X-Frame-Options"] = "DENY"
# NOTE: `style-src 'unsafe-inline'` is intentionally retained.
# `static/index.html` and `static/login.html` ship inline <style>
# blocks, and several JS modules build runtime `style=""` attrs.
# Migrating to nonce-only requires templating the HTML files +
# auditing every JS-set style attribute. Since inline styles
# don't execute script, the residual risk is visual-only.
response.headers["Content-Security-Policy"] = (
"default-src 'self'; "
f"script-src 'self' 'nonce-{nonce}' https://cdn.jsdelivr.net; "
"style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; "
"font-src 'self' https://cdn.jsdelivr.net; "
"img-src 'self' data: blob:; "
"media-src 'self' blob:; "
"connect-src 'self'; "
"frame-src 'self'; "
"frame-ancestors 'none'"
)
return response