mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
1a0e1c5d69
PDF uploads are stored as markdown wrappers with pdf_source or pdf_form_source markers so the editor can preserve extracted text, form fields, and annotations. The library exposed that internal wrapper: auto-created PDF documents used the hashed storage filename as the title, and row/facet language reported markdown instead of pdf.
Derive chat-upload PDF titles from the original upload name, derive document-library display language from the PDF source marker for rows, filters, and facets, and keep markdown wrappers excluded from the markdown facet when they represent PDFs.
The expanded library card already renders PDF-backed documents through /api/document/{id}/render-pdf. Allow only that inline PDF preview endpoint to be framed by same-origin app pages while leaving normal routes on X-Frame-Options: DENY and frame-ancestors none.
Also tighten the existing PDF marker regression assertion so it matches the actual historical corruption signature instead of contradicting the preserved [Page 1 text]: marker.
Fixes #2468
128 lines
5.7 KiB
Python
128 lines
5.7 KiB
Python
# src/middleware.py
|
|
# Shared middleware, decorators, and request helpers
|
|
|
|
import os
|
|
import secrets
|
|
|
|
from fastapi import HTTPException, Request
|
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|
from starlette.responses import Response
|
|
|
|
|
|
# Per-process token that lets the in-app tool layer hit admin-gated
|
|
# routes via HTTP loopback (the agent's tool calls don't carry the
|
|
# admin user's session cookie). Set once at import; tools read the
|
|
# same value from this module. Never persisted or exposed externally.
|
|
INTERNAL_TOOL_TOKEN = os.environ.get("ODYSSEUS_INTERNAL_TOKEN") or secrets.token_hex(32)
|
|
INTERNAL_TOOL_HEADER = "X-Odysseus-Internal-Token"
|
|
|
|
|
|
def is_cors_preflight(method: str, headers) -> bool:
|
|
"""True for a genuine CORS preflight: an OPTIONS request carrying the
|
|
Access-Control-Request-Method header. Such requests are credential-less by
|
|
design and must reach CORSMiddleware to be answered -- gating them on auth
|
|
401s the preflight and breaks every cross-origin browser/WebView client.
|
|
Pure so it can be unit-tested without standing up the app."""
|
|
return method == "OPTIONS" and "access-control-request-method" in headers
|
|
|
|
|
|
def require_admin(request: Request):
|
|
"""Raise 403 if the current user isn't an admin.
|
|
Allows access when auth is explicitly disabled, or when the request carries
|
|
the in-process internal-tool token used by loopback agent tools.
|
|
"""
|
|
# In-process bypass for tool-layer loopback calls. Two paths:
|
|
# (a) header-direct (caller set X-Odysseus-Internal-Token), or
|
|
# (b) the auth middleware already validated the token and stamped
|
|
# request.state.current_user = "internal-tool".
|
|
try:
|
|
hdr = request.headers.get(INTERNAL_TOOL_HEADER)
|
|
if hdr and secrets.compare_digest(hdr, INTERNAL_TOOL_TOKEN):
|
|
return
|
|
if getattr(request.state, "current_user", None) == "internal-tool":
|
|
return
|
|
except Exception:
|
|
pass
|
|
|
|
auth_mgr = getattr(request.app.state, "auth_manager", None)
|
|
if os.getenv("AUTH_ENABLED", "true").lower() == "false":
|
|
return
|
|
if not auth_mgr or not auth_mgr.is_configured:
|
|
raise HTTPException(403, "Admin only")
|
|
user = getattr(request.state, "current_user", None)
|
|
if not user or not auth_mgr.is_admin(user):
|
|
raise HTTPException(403, "Admin only")
|
|
|
|
|
|
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
|
"""Add standard security headers to all responses."""
|
|
|
|
async def dispatch(self, request: Request, call_next) -> Response:
|
|
# Generate a per-request nonce for inline scripts
|
|
nonce = secrets.token_hex(16)
|
|
request.state.csp_nonce = nonce
|
|
|
|
response = await call_next(request)
|
|
path = request.url.path
|
|
|
|
# Tool render endpoints are served inside iframes — allow framing by self
|
|
is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
|
|
# PDF previews are embedded by the in-app document library. Keep the
|
|
# exception route-scoped so normal app pages remain unframeable.
|
|
is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
|
|
# Visual report pages are self-contained HTML — need inline scripts + external images
|
|
is_report = path.startswith("/api/research/report/")
|
|
|
|
response.headers["X-Content-Type-Options"] = "nosniff"
|
|
response.headers["Referrer-Policy"] = "no-referrer"
|
|
response.headers["Permissions-Policy"] = "camera=(), microphone=(self), geolocation=()"
|
|
|
|
is_https = (
|
|
request.url.scheme == "https"
|
|
or request.headers.get("X-Forwarded-Proto") == "https"
|
|
)
|
|
if is_https:
|
|
response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
|
|
|
|
if is_report:
|
|
response.headers["Content-Security-Policy"] = (
|
|
"default-src 'self'; "
|
|
"script-src 'self' 'unsafe-inline'; "
|
|
"style-src 'self' 'unsafe-inline'; "
|
|
"font-src 'self'; "
|
|
"img-src 'self' data: blob: https:; "
|
|
"connect-src 'self'; "
|
|
"frame-ancestors 'none'"
|
|
)
|
|
elif is_tool_render:
|
|
# Tool iframe content: skip all framing headers — the iframe's
|
|
# sandbox="allow-scripts" attribute provides isolation.
|
|
# Don't overwrite the route's own restrictive CSP either.
|
|
pass
|
|
elif is_document_pdf_preview:
|
|
response.headers["X-Frame-Options"] = "SAMEORIGIN"
|
|
response.headers["Content-Security-Policy"] = (
|
|
"default-src 'none'; "
|
|
"frame-ancestors 'self'"
|
|
)
|
|
else:
|
|
response.headers["X-Frame-Options"] = "DENY"
|
|
# NOTE: `style-src 'unsafe-inline'` is intentionally retained.
|
|
# `static/index.html` and `static/login.html` ship inline <style>
|
|
# blocks, and several JS modules build runtime `style=""` attrs.
|
|
# Migrating to nonce-only requires templating the HTML files +
|
|
# auditing every JS-set style attribute. Since inline styles
|
|
# don't execute script, the residual risk is visual-only.
|
|
response.headers["Content-Security-Policy"] = (
|
|
"default-src 'self'; "
|
|
f"script-src 'self' 'nonce-{nonce}' https://cdn.jsdelivr.net; "
|
|
"style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; "
|
|
"font-src 'self' https://cdn.jsdelivr.net; "
|
|
"img-src 'self' data: blob:; "
|
|
"media-src 'self' blob:; "
|
|
"connect-src 'self'; "
|
|
"frame-src 'self'; "
|
|
"frame-ancestors 'none'"
|
|
)
|
|
return response
|