Fix email-thread HTML injection, attachment path traversal, and missing authz (#475)

Hardens issues found in a security review of the current tree (separate from
the cookbook SSH PR):

- Email thread rendering (static/js/emailLibrary.js): the flat read path runs
  inbound HTML through the allowlist sanitizer, but the two threaded paths
  (_renderTurnsAsBubbles / _renderTurnsFromServer — the default view) injected
  server-parsed `body_html` raw into the DOM. A crafted inbound email could
  inject arbitrary markup (phishing/form/credential-capture/tracking; full XSS
  if a deployment relaxes the script CSP). Now sanitized on all paths.

- Attachment extraction (routes/email_routes.py, routes/email_helpers.py): the
  on-disk extraction dir was `ATTACHMENTS_DIR / f"{folder}_{uid}"` with
  user-controlled folder/uid and no containment, so a folder like `../../tmp`
  could escape ATTACHMENTS_DIR. New attachment_extract_dir() flattens both to a
  single safe segment and asserts containment.

- Diagnostics routes (routes/diagnostics_routes.py): /api/db/stats,
  /api/rag/stats, /api/test/youtube, /api/test-research relied only on the
  global session check (any logged-in user). Now require_admin-gated.

- Defense-in-depth HTML escaping: session HTML export escapes the session name
  (routes/session_routes.py); the MCP OAuth page escapes the reflected Host
  header / server_id (routes/mcp_routes.py).

- Internal-tool token now compared with secrets.compare_digest (constant time)
  in core/middleware.py and app.py.

Adds regression tests in tests/test_security_regressions.py.
This commit is contained in:
Jamieson O'Reilly
2026-06-01 23:20:17 +10:00
committed by GitHub
parent 9e8de43f25
commit 171c29dcf3
9 changed files with 113 additions and 16 deletions
+68
View File
@@ -622,3 +622,71 @@ def test_web_fetch_guard_blocks_redirect_into_private(monkeypatch):
with _pytest.raises(httpx.RequestError) as exc:
content._get_public_url("http://public.example/start", headers={}, timeout=5)
assert "non-public" in str(exc.value)
# ── audit fixes (2026-06-01): email XSS, attachment traversal, authz ──
def _import_attachment_extract_dir():
sys.modules.pop("routes.email_helpers", None)
from routes.email_helpers import attachment_extract_dir, ATTACHMENTS_DIR
return attachment_extract_dir, ATTACHMENTS_DIR
@pytest.mark.parametrize("folder,uid", [
("../../../../tmp/evil", "1"),
("INBOX", "../../etc/cron.d/x"),
("a/../../b", "x"),
("..", ".."),
("/abs/path", "2"),
])
def test_attachment_extract_dir_stays_contained(folder, uid):
"""User-controlled folder/uid must never escape ATTACHMENTS_DIR — pins the
fix for the attachment-extraction path traversal."""
aed, base = _import_attachment_extract_dir()
target = aed(folder, uid)
base_r = base.resolve()
assert target == base_r or base_r in target.parents
# exactly one extra path segment, and no `..` component survived
rel = target.relative_to(base_r)
assert ".." not in rel.parts
def test_attachment_extract_dir_normal_inputs_unchanged():
aed, base = _import_attachment_extract_dir()
assert aed("INBOX", "123") == base.resolve() / "INBOX_123"
def test_diagnostics_routes_are_admin_gated():
"""db/rag stats + test endpoints must require admin (they relied only on
the global session check before)."""
src = Path(__file__).resolve().parents[1] / "routes" / "diagnostics_routes.py"
text = src.read_text()
for handler in ("get_database_stats", "get_rag_stats", "test_youtube", "test_research"):
assert f"def {handler}(request: Request" in text, handler
assert text.count("require_admin(request)") >= 4
def test_email_thread_rendering_sanitizes_body_html():
"""Both threaded render paths must run server-parsed body_html through the
allowlist sanitizer (the flat path already did)."""
src = Path(__file__).resolve().parents[1] / "static" / "js" / "emailLibrary.js"
text = src.read_text()
# every `t.body_html` reference is wrapped by _sanitizeHtml(...)
assert text.count("t.body_html") == text.count("_sanitizeHtml(t.body_html")
assert "t.body_html" in text # guard against the file being refactored away
def test_session_html_export_escapes_name():
src = Path(__file__).resolve().parents[1] / "routes" / "session_routes.py"
text = src.read_text()
assert "safe_title = html.escape(session.name" in text
assert "<title>{session.name}" not in text
assert "<h1>{session.name}</h1>" not in text
def test_mcp_oauth_page_escapes_reflected_values():
src = Path(__file__).resolve().parents[1] / "routes" / "mcp_routes.py"
text = src.read_text()
body = text.split("def _oauth_authorize_page(", 1)[1].split("return f", 1)[0]
for var in ("auth_url", "server_id", "host"):
assert f"{var} = html.escape({var}" in body, var