mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-22 20:55:29 -04:00
fix(security): redact credential-bearing URLs and PII from logs (#4750)
* fix(security): redact credential-bearing URLs and PII from logs Several log statements emitted sensitive data in clear text: - model_routes / chat_routes / contacts_routes logged endpoint URLs raw. Admin-configured URLs can embed credentials in userinfo or query (e.g. https://user:pass@host, ?api_key=...). Route them through a shared core.log_safety.redact_url() that drops userinfo/query/fragment. - note_routes / task_scheduler logged operator email addresses (smtp_user, recipient). Replaced with presence booleans, which keeps the diagnostic ("why didn't this send") without writing PII to logs. model_routes already had a local redactor on its HTTPStatusError branch; the generic except branch was missed, so reuse the existing helper there. Clears CodeQL py/clear-text-logging-sensitive-data alerts 264, 317, 324, 325, 343, 344, 528. * fix(security): re-bracket IPv6 hosts and single-source the URL redactor Address review on #4750: - redact_url now re-brackets IPv6 literals so host:port stays unambiguous (https://[2001:db8::1]:8443/v1, not the bracket-less ambiguous form). - point model_routes._redact_url_for_log at the shared helper so the two redactors are single-sourced (also picks up the IPv6 fix).
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
"""Helpers for keeping sensitive data out of logs.
|
||||
|
||||
Endpoint URLs configured by admins can embed credentials in the userinfo
|
||||
(``https://user:pass@host``) or query string (``?api_key=...``). Logging them
|
||||
raw leaks those secrets, so route/diagnostic logs run URLs through
|
||||
``redact_url`` first. Reconstructing the URL without userinfo/query/fragment
|
||||
also doubles as a sanitizer barrier for CodeQL's clear-text-logging query.
|
||||
"""
|
||||
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
|
||||
def redact_url(url: str) -> str:
|
||||
"""Return a URL safe for logs by removing userinfo and query/fragment.
|
||||
|
||||
Keeps scheme, host, port and path so logs stay useful for debugging.
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url or "")
|
||||
host = parsed.hostname or ""
|
||||
if ":" in host: # IPv6 literal — re-bracket so host:port stays unambiguous
|
||||
host = f"[{host}]"
|
||||
if parsed.port:
|
||||
host = f"{host}:{parsed.port}"
|
||||
return urlunparse((parsed.scheme, host, parsed.path, "", "", ""))
|
||||
except Exception:
|
||||
return "<endpoint>"
|
||||
@@ -29,6 +29,7 @@ from routes.document_helpers import _owner_session_filter
|
||||
from core.database import SessionLocal, get_session_mode, set_session_mode
|
||||
from core.database import Session as DBSession, ChatMessage as DBChatMessage
|
||||
from core.database import Document as DBDocument, ModelEndpoint
|
||||
from core.log_safety import redact_url
|
||||
from routes.research_routes import _resolve_research_endpoint
|
||||
from routes.model_routes import _visible_models
|
||||
from routes.chat_helpers import (
|
||||
@@ -930,7 +931,7 @@ def setup_chat_routes(
|
||||
if effective_do_research:
|
||||
_r_ep, _r_model, _r_headers = _resolve_research_endpoint(sess)
|
||||
_auth_keys = list(_r_headers.keys()) if _r_headers else []
|
||||
logger.info(f"Research endpoint resolved: model={_r_model}, endpoint={_r_ep}, auth_keys={_auth_keys}, sess_headers_keys={list(sess.headers.keys()) if isinstance(sess.headers, dict) else type(sess.headers)}")
|
||||
logger.info(f"Research endpoint resolved: model={_r_model}, endpoint={redact_url(_r_ep)}, auth_keys={_auth_keys}, sess_headers_keys={list(sess.headers.keys()) if isinstance(sess.headers, dict) else type(sess.headers)}")
|
||||
|
||||
# Clarification round: only for very short/vague queries on first research message.
|
||||
# Skip in compare mode — each pane is a fresh session, so every one would
|
||||
|
||||
@@ -18,6 +18,7 @@ from pathlib import Path
|
||||
from datetime import datetime
|
||||
from urllib.parse import urljoin, urlparse, urlunparse
|
||||
|
||||
from core.log_safety import redact_url
|
||||
from fastapi import APIRouter, Query, Depends, Response, HTTPException
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
@@ -702,7 +703,7 @@ def _delete_contact(uid: str) -> bool:
|
||||
logger.warning(
|
||||
f"CardDAV DELETE reported success for {uid} "
|
||||
f"but UID still present after re-fetch — "
|
||||
f"resource URL may differ from {url}"
|
||||
f"resource URL may differ from {redact_url(url)}"
|
||||
)
|
||||
return False
|
||||
if r.status_code == 404:
|
||||
|
||||
+3
-14
@@ -17,6 +17,7 @@ from fastapi import APIRouter, HTTPException, Form, Query, Body, Request, Respon
|
||||
from pydantic import BaseModel
|
||||
from fastapi.responses import StreamingResponse
|
||||
from core.database import SessionLocal, ModelEndpoint, Session as DbSession
|
||||
from core.log_safety import redact_url as _redact_url_for_log
|
||||
from core.middleware import require_admin
|
||||
from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS
|
||||
from src.tls_overrides import llm_verify
|
||||
@@ -582,18 +583,6 @@ def _safe_build_headers(api_key: Optional[str], base_url: str) -> dict:
|
||||
return {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
||||
|
||||
|
||||
def _redact_url_for_log(url: str) -> str:
|
||||
"""Return a URL safe for logs by removing userinfo and query/fragment."""
|
||||
try:
|
||||
parsed = urlparse(url or "")
|
||||
host = parsed.hostname or ""
|
||||
if parsed.port:
|
||||
host = f"{host}:{parsed.port}"
|
||||
return urlunparse((parsed.scheme, host, parsed.path, "", "", ""))
|
||||
except Exception:
|
||||
return "<endpoint>"
|
||||
|
||||
|
||||
def _is_discovery_only_provider(provider: str) -> bool:
|
||||
return provider == "chatgpt-subscription"
|
||||
|
||||
@@ -810,9 +799,9 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
logger.warning("Failed to probe %s: %s", _redact_url_for_log(url), e)
|
||||
except Exception as e:
|
||||
if api_key:
|
||||
logger.warning(f"Failed to probe {url} with API key: {e}")
|
||||
logger.warning("Failed to probe %s with API key: %s", _redact_url_for_log(url), e)
|
||||
return []
|
||||
logger.warning(f"Failed to probe {url}: {e}")
|
||||
logger.warning("Failed to probe %s: %s", _redact_url_for_log(url), e)
|
||||
|
||||
# Older Ollama builds and some proxies expose native /api/tags even when
|
||||
# the OpenAI-compatible /v1/models path is unavailable.
|
||||
|
||||
@@ -335,10 +335,11 @@ async def dispatch_reminder(
|
||||
# Loud diagnostic so we can see WHY a reminder didn't send (the
|
||||
# previous "silently no-op when cfg has no smtp_host" was invisible).
|
||||
logger.info(
|
||||
f"dispatch_reminder[email] note_id={note_id} owner={owner!r} "
|
||||
f"smtp_host={cfg.get('smtp_host')!r} smtp_user={cfg.get('smtp_user')!r} "
|
||||
f"from={from_addr!r} recipient={recipient!r} "
|
||||
f"account_name={cfg.get('account_name')!r}"
|
||||
"dispatch_reminder[email] note_id=%s owner=%r "
|
||||
"has_smtp_host=%s has_smtp_user=%s has_from=%s has_recipient=%s",
|
||||
note_id, owner,
|
||||
bool(cfg.get("smtp_host")), bool(cfg.get("smtp_user")),
|
||||
bool(from_addr), bool(recipient),
|
||||
)
|
||||
missing = []
|
||||
if not cfg.get("smtp_host"):
|
||||
|
||||
@@ -1667,7 +1667,7 @@ class TaskScheduler:
|
||||
msg["X-Odysseus-Ref"] = str(task.id)
|
||||
msg.set_content(result or "")
|
||||
_send_smtp_message(cfg, from_addr, [to_addr], msg.as_string(), timeout=30)
|
||||
logger.info("Task %s emailed result to %s (%sb)", task.id, to_addr, len(result or ""))
|
||||
logger.info("Task %s emailed result (recipient_set=%s, %sb)", task.id, bool(to_addr), len(result or ""))
|
||||
except Exception as e:
|
||||
logger.error("Task %s email delivery failed: %s", task.id, e, exc_info=True)
|
||||
raise
|
||||
@@ -2029,7 +2029,7 @@ class TaskScheduler:
|
||||
# silent SMTP failure is easier to spot in the logs.
|
||||
logger.info(
|
||||
f"Task {task.id} delivered via MCP tool {tool_name} "
|
||||
f"(to={recipient or '<unset>'}, body={body_len}b, reply={stdout[:200]!r})"
|
||||
f"(recipient_set={bool(recipient)}, body={body_len}b, reply={stdout[:200]!r})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task.id} MCP delivery failed: {e}")
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
from core.log_safety import redact_url
|
||||
|
||||
|
||||
def test_strips_userinfo():
|
||||
assert redact_url("https://user:pass@host.example/v1/models") == "https://host.example/v1/models"
|
||||
|
||||
|
||||
def test_strips_query_and_fragment():
|
||||
assert redact_url("https://host.example/v1?api_key=secret#frag") == "https://host.example/v1"
|
||||
|
||||
|
||||
def test_keeps_port_and_path():
|
||||
assert redact_url("http://host.example:8080/api/tags") == "http://host.example:8080/api/tags"
|
||||
|
||||
|
||||
def test_ipv6_host_keeps_brackets():
|
||||
assert redact_url("https://user:pass@[2001:db8::1]:8443/v1") == "https://[2001:db8::1]:8443/v1"
|
||||
assert redact_url("https://[2001:db8::1]/v1") == "https://[2001:db8::1]/v1"
|
||||
|
||||
|
||||
def test_no_credentials_passthrough():
|
||||
assert redact_url("https://host.example/v1/models") == "https://host.example/v1/models"
|
||||
|
||||
|
||||
def test_empty_and_none():
|
||||
assert redact_url("") == ""
|
||||
assert redact_url(None) == ""
|
||||
|
||||
|
||||
def test_garbage_does_not_raise():
|
||||
# urlparse is lenient; just assert no credential-looking userinfo survives.
|
||||
assert "@" not in redact_url("::::not a url::::")
|
||||
Reference in New Issue
Block a user