Fix email-thread HTML injection, attachment path traversal, and missing authz (#475)

Hardens issues found in a security review of the current tree (separate from
the cookbook SSH PR):

- Email thread rendering (static/js/emailLibrary.js): the flat read path runs
  inbound HTML through the allowlist sanitizer, but the two threaded paths
  (_renderTurnsAsBubbles / _renderTurnsFromServer — the default view) injected
  server-parsed `body_html` raw into the DOM. A crafted inbound email could
  inject arbitrary markup (phishing/form/credential-capture/tracking; full XSS
  if a deployment relaxes the script CSP). Now sanitized on all paths.

- Attachment extraction (routes/email_routes.py, routes/email_helpers.py): the
  on-disk extraction dir was `ATTACHMENTS_DIR / f"{folder}_{uid}"` with
  user-controlled folder/uid and no containment, so a folder like `../../tmp`
  could escape ATTACHMENTS_DIR. New attachment_extract_dir() flattens both to a
  single safe segment and asserts containment.

- Diagnostics routes (routes/diagnostics_routes.py): /api/db/stats,
  /api/rag/stats, /api/test/youtube, /api/test-research relied only on the
  global session check (any logged-in user). Now require_admin-gated.

- Defense-in-depth HTML escaping: session HTML export escapes the session name
  (routes/session_routes.py); the MCP OAuth page escapes the reflected Host
  header / server_id (routes/mcp_routes.py).

- Internal-tool token now compared with secrets.compare_digest (constant time)
  in core/middleware.py and app.py.

Adds regression tests in tests/test_security_regressions.py.
This commit is contained in:
Jamieson O'Reilly
2026-06-01 23:20:17 +10:00
committed by GitHub
parent 9e8de43f25
commit 171c29dcf3
9 changed files with 113 additions and 16 deletions
+10 -5
View File
@@ -3,10 +3,11 @@
import logging
from typing import Dict, Any
from fastapi import APIRouter, HTTPException, Form
from fastapi import APIRouter, HTTPException, Form, Request
from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async
from core.constants import DEFAULT_HOST
from core.middleware import require_admin
logger = logging.getLogger(__name__)
@@ -19,7 +20,8 @@ def setup_diagnostics_routes(
router = APIRouter(tags=["diagnostics"])
@router.get("/api/db/stats")
async def get_database_stats() -> Dict[str, Any]:
async def get_database_stats(request: Request) -> Dict[str, Any]:
require_admin(request)
try:
from core.database import get_detailed_stats
return get_detailed_stats()
@@ -28,13 +30,15 @@ def setup_diagnostics_routes(
raise HTTPException(500, "Failed to retrieve database statistics")
@router.get("/api/rag/stats")
async def get_rag_stats() -> Dict[str, Any]:
async def get_rag_stats(request: Request) -> Dict[str, Any]:
require_admin(request)
if rag_available and rag_manager:
return rag_manager.get_stats()
return {"error": "RAG system not available"}
@router.get("/api/test/youtube")
async def test_youtube(url: str) -> Dict[str, Any]:
async def test_youtube(request: Request, url: str) -> Dict[str, Any]:
require_admin(request)
try:
video_id = extract_youtube_id(url)
if not video_id:
@@ -54,7 +58,8 @@ def setup_diagnostics_routes(
return {"error": str(e)}
@router.post("/api/test-research")
async def test_research(query: str = Form("What is machine learning?")) -> Dict[str, Any]:
async def test_research(request: Request, query: str = Form("What is machine learning?")) -> Dict[str, Any]:
require_admin(request)
try:
endpoint = f"http://{DEFAULT_HOST}:8000/v1/chat/completions"
model = "gpt-oss-120b"
+14
View File
@@ -269,6 +269,20 @@ COMPOSE_UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
SCHEDULED_DB = DATA_DIR / "scheduled_emails.db"
def attachment_extract_dir(folder: str, uid: str) -> Path:
"""Containment-safe extraction directory for an attachment.
`folder` and `uid` are user-controlled (query/path params). Flatten them to
a single safe path segment so a value like folder='../../tmp' can't escape
ATTACHMENTS_DIR, then assert containment as belt-and-suspenders."""
key = re.sub(r"[^A-Za-z0-9._-]", "_", f"{folder}_{uid}") or "_"
target = (ATTACHMENTS_DIR / key).resolve()
base = ATTACHMENTS_DIR.resolve()
if target != base and base not in target.parents:
raise HTTPException(400, "Invalid attachment location")
return target
def _init_scheduled_db():
import sqlite3
conn = sqlite3.connect(SCHEDULED_DB)
+4 -3
View File
@@ -48,6 +48,7 @@ from routes.email_helpers import (
_EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
SendEmailRequest, ExtractStyleRequest,
ATTACHMENTS_DIR, COMPOSE_UPLOADS_DIR, SCHEDULED_DB,
attachment_extract_dir,
)
from routes.email_pollers import _start_poller
@@ -1390,7 +1391,7 @@ def setup_email_routes():
msg = email_mod.message_from_bytes(raw)
# Extract to a per-email folder
target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}"
target_dir = attachment_extract_dir(folder, uid)
filepath = _extract_attachment_to_disk(msg, index, target_dir)
if not filepath:
return {"error": f"Attachment index {index} not found"}
@@ -1425,7 +1426,7 @@ def setup_email_routes():
raw = msg_data[0][1]
msg = email_mod.message_from_bytes(raw)
target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}"
target_dir = attachment_extract_dir(folder, uid)
filepath = _extract_attachment_to_disk(msg, index, target_dir)
if not filepath:
return {"error": f"Attachment index {index} not found"}
@@ -1633,7 +1634,7 @@ def setup_email_routes():
raw = msg_data[0][1]
msg = email_mod.message_from_bytes(raw)
target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}"
target_dir = attachment_extract_dir(folder, uid)
filepath = _extract_attachment_to_disk(msg, index, target_dir)
if not filepath:
return {"error": f"Attachment index {index} not found"}
+5
View File
@@ -499,6 +499,11 @@ def setup_mcp_routes(mcp_manager: McpManager):
def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
"""Page with Google sign-in link and URL paste-back form for remote access."""
# Escape values interpolated into the page: `host` comes from the request
# Host header and `server_id` from the OAuth state — neither is trusted.
auth_url = html.escape(auth_url, quote=True)
server_id = html.escape(server_id, quote=True)
host = html.escape(host, quote=True)
return f"""<!DOCTYPE html>
<html><head>
<meta charset="UTF-8"><title>Authorize Odysseus</title>
+4 -2
View File
@@ -1,5 +1,6 @@
# routes/session_routes.py
import re
import html
import json
import uuid
from datetime import datetime
@@ -587,15 +588,16 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
)
if fmt == "html":
safe_title = html.escape(session.name or "")
html_parts = [
"<!DOCTYPE html><html><head>",
f"<meta charset='utf-8'><title>{session.name}</title>",
f"<meta charset='utf-8'><title>{safe_title}</title>",
"<style>body{font-family:monospace;max-width:800px;margin:2rem auto;padding:0 1rem;background:#111;color:#ddd}",
".msg{margin:1rem 0;padding:0.8rem;border-radius:6px;border:1px solid #333}",
".user{background:#1a1a2e}.ai{background:#1a2e1a}",
".role{font-weight:bold;margin-bottom:0.4rem;opacity:0.7;text-transform:uppercase;font-size:0.85em}",
"pre{background:#000;padding:0.5rem;border-radius:4px;overflow-x:auto}</style></head><body>",
f"<h1>{session.name}</h1>",
f"<h1>{safe_title}</h1>",
]
for m in session.history:
cls = "user" if m.role == "user" else "ai"