diff --git a/core/middleware.py b/core/middleware.py index b3775e812..550ee3bd7 100644 --- a/core/middleware.py +++ b/core/middleware.py @@ -67,6 +67,9 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): # Tool render endpoints are served inside iframes — allow framing by self is_tool_render = path.startswith("/api/tools/") and path.endswith("/render") + # PDF previews are embedded by the in-app document library. Keep the + # exception route-scoped so normal app pages remain unframeable. + is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf") # Visual report pages are self-contained HTML — need inline scripts + external images is_report = path.startswith("/api/research/report/") @@ -96,6 +99,12 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): # sandbox="allow-scripts" attribute provides isolation. # Don't overwrite the route's own restrictive CSP either. pass + elif is_document_pdf_preview: + response.headers["X-Frame-Options"] = "SAMEORIGIN" + response.headers["Content-Security-Policy"] = ( + "default-src 'none'; " + "frame-ancestors 'self'" + ) else: response.headers["X-Frame-Options"] = "DENY" # NOTE: `style-src 'unsafe-inline'` is intentionally retained. diff --git a/routes/document_routes.py b/routes/document_routes.py index 20df372a1..09b7d8b1f 100644 --- a/routes/document_routes.py +++ b/routes/document_routes.py @@ -7,7 +7,7 @@ from typing import Dict, Any, List, Optional from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Form -from sqlalchemy import func, or_ +from sqlalchemy import case, func, or_ from core.database import SessionLocal, Document, DocumentVersion from core.database import Session as DbSession from src.auth_helpers import get_current_user @@ -39,6 +39,19 @@ def _aggregate_language_facets(lang_rows): return out +def _library_language_for_document(doc: Document) -> str: + """Return the display language used by the document library. + + PDF documents are stored as markdown wrappers so the editor can preserve + extracted text, form fields, and annotations. The library should still + identify them as PDFs instead of exposing that internal wrapper format. + """ + from src.pdf_form_doc import find_source_upload_id + + if find_source_upload_id(doc.current_content or ""): + return "pdf" + return doc.language or "text" + from routes.document_helpers import ( DocumentCreate, DocumentUpdate, DocumentPatch, @@ -260,18 +273,29 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: db = SessionLocal() try: from sqlalchemy import or_ + pdf_marker_cond = or_( + Document.current_content.like('%\n\n# Packet\n', + ) + + assert _library_language_for_document(doc) == "pdf" + + +def test_pdf_backed_form_document_displays_as_pdf_in_library(): + doc = SimpleNamespace( + language="markdown", + current_content=( + '' + "\n\n# Intake Form\n" + ), + ) + + assert _library_language_for_document(doc) == "pdf" + + +def test_non_pdf_library_language_is_unchanged(): + assert _library_language_for_document( + SimpleNamespace(language="python", current_content="print('ok')\n") + ) == "python" + assert _library_language_for_document( + SimpleNamespace(language=None, current_content="plain text") + ) == "text" + + +def test_pdf_language_facet_counts_are_summed(): + rows = [("pdf", 1), ("markdown", 2), ("pdf", 1), (None, 1)] + + assert _aggregate_language_facets(rows) == { + "pdf": 2, + "markdown": 2, + "text": 1, + } diff --git a/tests/test_security_headers_pdf_preview.py b/tests/test_security_headers_pdf_preview.py new file mode 100644 index 000000000..53c8dd3d2 --- /dev/null +++ b/tests/test_security_headers_pdf_preview.py @@ -0,0 +1,36 @@ +from fastapi import FastAPI +from fastapi.responses import Response +from fastapi.testclient import TestClient + +from core.middleware import SecurityHeadersMiddleware + + +def _client(): + app = FastAPI() + app.add_middleware(SecurityHeadersMiddleware) + + @app.get("/plain") + async def plain(): + return {"ok": True} + + @app.get("/api/document/{doc_id}/render-pdf") + async def render_pdf(doc_id: str): + return Response(b"%PDF-1.4\n", media_type="application/pdf") + + return TestClient(app) + + +def test_default_routes_remain_unframeable(): + response = _client().get("/plain") + + assert response.headers["X-Frame-Options"] == "DENY" + assert "frame-ancestors 'none'" in response.headers["Content-Security-Policy"] + + +def test_document_pdf_preview_can_be_framed_by_same_origin(): + response = _client().get("/api/document/doc-123/render-pdf") + + assert response.headers["X-Frame-Options"] == "SAMEORIGIN" + assert response.headers["Content-Security-Policy"] == ( + "default-src 'none'; frame-ancestors 'self'" + )