From d2bad10781e435d58ed4326039b2823fc6b8a037 Mon Sep 17 00:00:00 2001 From: tanmayraut45 Date: Tue, 2 Jun 2026 02:47:30 +0530 Subject: [PATCH 001/496] Fix searxng container permission errors during setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A fresh `docker compose up -d` shows the searxng container failing its healthcheck with permission errors at setup (reported in #721 — the service comes up under names like `odysseus_searxng_1` and never goes ready, which then blocks the main odysseus container because of the `depends_on: searxng: condition: service_healthy` gate). Root cause: the official `searxng/searxng:latest` image runs as the non-root `searxng` user but its entrypoint still needs to 1. chown /etc/searxng on first boot so the persisted named volume is owned by the searxng user inside the container, 2. su-exec to drop / re-assert privileges before launching uwsgi, and 3. let our wrapper entrypoint (which seeds settings.yml into the named volume on first boot) write the file through the volume mount. Without explicit `cap_add`, the container has neither CHOWN nor DAC_OVERRIDE nor SETUID/SETGID, so the entrypoint aborts at the first chown / su-exec / redirection with EACCES. The upstream searxng-docker compose file solves this with the standard "drop everything, grant only what's needed" capability pattern. Fix: mirror the upstream cap_drop ALL / cap_add CHOWN, SETGID, SETUID, DAC_OVERRIDE on the searxng service. This grants only the four caps the entrypoint actually needs, matches what searxng-docker ships with, and leaves ports, volumes, env, healthcheck, and the wrapper entrypoint unchanged. Closes #721. --- docker-compose.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index f91017b86..ef3afda41 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -76,6 +76,20 @@ services: environment: - SEARXNG_BASE_URL=http://localhost:8080/ - SEARXNG_SECRET=${SEARXNG_SECRET:-} + # The official searxng image runs as the non-root `searxng` user, but its + # entrypoint still needs to chown /etc/searxng on first boot, drop privs via + # su-exec, and (with our wrapper above) write settings.yml into the named + # volume. Without these capabilities the wrapper aborts at the redirection + # with EACCES and the container fails its healthcheck with permission + # errors during setup. Mirrors the cap set recommended by the upstream + # searxng-docker compose file. See issue #721. + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE healthcheck: test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""] interval: 5s From 2d7d7b2412d7d2df7358cdfcc79686b39e9e17f4 Mon Sep 17 00:00:00 2001 From: tanmayraut45 Date: Tue, 2 Jun 2026 03:02:30 +0530 Subject: [PATCH 002/496] Fix TOCTOU race in chat stream status endpoint The /api/chat/stream_status handler did a membership test against _active_streams followed by an indexed read of the same key. Between those two ops, a sibling stream's finally block (or a stop / cleanup path) can pop the entry, turning the indexed read into a KeyError that bubbles up as a 500. The race is the exact one _stream_set was already written to avoid; the comment on the helper at the top of the module spells out why a single .get() is the right pattern here too. Collapse the two-step into a single .get() call so the lookup either returns the live record or None, and report 'detached' / 404 based on that single read. No behavior change on the happy path; the failure mode under concurrent stream cleanup is now handled deterministically. Closes #658. --- routes/chat_routes.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/routes/chat_routes.py b/routes/chat_routes.py index 3cdcb8586..d0da48068 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -920,11 +920,15 @@ def setup_chat_routes( _verify_session_owner(request, session_id) # A detached run can still be going even if _active_streams was popped; # report it as active so the client knows to reconnect via /resume. - if session_id not in _active_streams: + # Read once via .get() to avoid a KeyError race between the membership + # check and the indexed read if a sibling stream's finally pops the + # entry in between (same pattern _stream_set already uses). + rec = _active_streams.get(session_id) + if rec is None: if agent_runs.is_active(session_id): return {"status": "streaming", "detached": True} raise HTTPException(404, "No active stream for this session") - return _active_streams[session_id] + return rec # ------------------------------------------------------------------ # # POST /api/inject_context From cb6f6b65ea574cad538771f89592c0ad39e83c2e Mon Sep 17 00:00:00 2001 From: Ernest Hysa Date: Mon, 1 Jun 2026 23:25:38 +0100 Subject: [PATCH 003/496] fix(research): validate session_id to block path traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every research endpoint interpolates session_id into filesystem paths (Path('data/deep_research') / f'{session_id}.json') without checking for traversal sequences. A crafted ID like '../../data/auth' reaches arbitrary JSON files — readable via research_detail (which also leaks file paths in error messages), writable via research_archive, and deletable via research_delete. Add _validate_session_id() which rejects anything outside [a-zA-Z0-9-]{1,128}. Called before filesystem access in all 12 endpoints that accept a session_id path parameter. --- routes/research_routes.py | 19 +++++++ tests/test_research_session_id_validation.py | 55 ++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 tests/test_research_session_id_validation.py diff --git a/routes/research_routes.py b/routes/research_routes.py index 4def1dd55..a04f76298 100644 --- a/routes/research_routes.py +++ b/routes/research_routes.py @@ -3,6 +3,7 @@ import asyncio import json import logging +import re import uuid from datetime import datetime from pathlib import Path @@ -14,6 +15,8 @@ from pydantic import BaseModel, Field from src.endpoint_resolver import resolve_endpoint from src.auth_helpers import get_current_user +_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$") + logger = logging.getLogger(__name__) # Model-name substrings that are NOT chat/generation models — research must @@ -58,6 +61,10 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: raise HTTPException(401, "Not authenticated") return user + def _validate_session_id(session_id: str) -> None: + if not _SESSION_ID_RE.fullmatch(session_id): + raise HTTPException(400, "Invalid session ID format") + def _owns_in_memory(session_id: str, user: str) -> bool: """Ownership check for an in-flight (in-memory) research task. Falls back to the on-disk JSON if the task has already finished.""" @@ -95,6 +102,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: @router.get("/api/research/status/{session_id}") async def research_status(session_id: str, request: Request): user = _require_user(request) + _validate_session_id(session_id) if not _owns_in_memory(session_id, user): raise HTTPException(404, "No research found for this session") status = research_handler.get_status(session_id) @@ -105,6 +113,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: @router.post("/api/research/cancel/{session_id}") async def research_cancel(session_id: str, request: Request): user = _require_user(request) + _validate_session_id(session_id) if not _owns_in_memory(session_id, user): raise HTTPException(404, "No research found for this session") cancelled = research_handler.cancel_research(session_id) @@ -113,6 +122,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: @router.post("/api/research/result/{session_id}") async def research_result(session_id: str, request: Request): user = _require_user(request) + _validate_session_id(session_id) if not _owns_in_memory(session_id, user): raise HTTPException(404, "No research result available") result = research_handler.get_result(session_id) @@ -140,6 +150,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: async def research_report(session_id: str, request: Request): """Serve the visual HTML report for a completed research session.""" user = _require_user(request) + _validate_session_id(session_id) _assert_owns_research(session_id, user) logger.info(f"Visual report requested for session {session_id}") try: @@ -160,6 +171,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: """Mark an image URL as hidden for this research's visual report. Persisted to the research JSON so subsequent /report renders skip it.""" user = _require_user(request) + _validate_session_id(session_id) _assert_owns_research(session_id, user) ok = research_handler.hide_image(session_id, body.url) if not ok: @@ -170,6 +182,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: async def research_unhide_images(session_id: str, request: Request): """Clear the hidden-images list for a research session.""" user = _require_user(request) + _validate_session_id(session_id) _assert_owns_research(session_id, user) ok = research_handler.unhide_all_images(session_id) if not ok: @@ -235,6 +248,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: """Return the full JSON for a single research result — sources, summary, stats — used by the Library preview panel.""" user = _require_user(request) + _validate_session_id(session_id) path = Path("data/deep_research") / f"{session_id}.json" if not path.exists(): raise HTTPException(404, "Research not found") @@ -251,6 +265,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: async def research_archive(session_id: str, request: Request, archived: bool = Query(True)): """Soft-archive / restore a research report (sets `archived` in its JSON).""" user = _require_user(request) + _validate_session_id(session_id) path = Path("data/deep_research") / f"{session_id}.json" if not path.exists(): raise HTTPException(404, "Research not found") @@ -270,6 +285,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: async def research_delete(session_id: str, request: Request): """Delete a research result from disk.""" user = _require_user(request) + _validate_session_id(session_id) data_dir = Path("data/deep_research") json_path = data_dir / f"{session_id}.json" deleted = False @@ -413,6 +429,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: async def research_stream(session_id: str, request: Request): """SSE stream of research progress events.""" user = _require_user(request) + _validate_session_id(session_id) if not _owns_in_memory(session_id, user): raise HTTPException(404, "No research found for this session") async def _generate(): @@ -446,6 +463,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: async def research_result_peek(session_id: str, request: Request): """Get research result without clearing it (for panel use).""" user = _require_user(request) + _validate_session_id(session_id) if not _owns_in_memory(session_id, user): raise HTTPException(404, "No research found for this session") result = research_handler.get_result(session_id) @@ -475,6 +493,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: the user can ask follow-up questions in a clean conversation. """ _require_user(request) + _validate_session_id(session_id) if session_manager is None: raise HTTPException(500, "session_manager not configured") diff --git a/tests/test_research_session_id_validation.py b/tests/test_research_session_id_validation.py new file mode 100644 index 000000000..499b72a86 --- /dev/null +++ b/tests/test_research_session_id_validation.py @@ -0,0 +1,55 @@ +"""Regression tests: research session_id must reject path-traversal sequences.""" + +import re +import unittest + +_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$") + + +class TestResearchSessionIdValidation(unittest.TestCase): + """Validate the regex used to guard research session_id path params.""" + + def test_accepts_rp_prefixed_id(self): + self.assertIsNotNone(_SESSION_ID_RE.fullmatch("rp-abc123def456")) + + def test_accepts_standard_uuid(self): + self.assertIsNotNone( + _SESSION_ID_RE.fullmatch("550e8400-e29b-41d4-a716-446655440000") + ) + + def test_accepts_custom_alphanumeric(self): + self.assertIsNotNone(_SESSION_ID_RE.fullmatch("custom-id-123")) + + def test_rejects_double_dot(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch("..")) + + def test_rejects_single_dot(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch(".")) + + def test_rejects_dot_slash_traversal(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch("../../data/auth")) + + def test_rejects_deep_traversal(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch("../../../etc/passwd")) + + def test_rejects_mixed_traversal(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch("normal/../../traversal")) + + def test_rejects_dot_prefix_traversal(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch("./../../secret")) + + def test_rejects_empty(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch("")) + + def test_rejects_whitespace(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch(" ")) + + def test_rejects_slash(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch("a/b")) + + def test_rejects_null_byte(self): + self.assertIsNone(_SESSION_ID_RE.fullmatch("rp-test\x00")) + + +if __name__ == "__main__": + unittest.main() From c0466274ed8254ac0787eaa72c3c47c605ce977b Mon Sep 17 00:00:00 2001 From: BSG-Walter Date: Mon, 1 Jun 2026 19:42:01 -0300 Subject: [PATCH 004/496] fix: resolve DuckDuckGo redirect URLs in HTML fallback search The DuckDuckGo HTML fallback returns redirect URLs (//duckduckgo.com/l/?uddg=...) instead of actual page URLs. This caused fetch_webpage_content() to reject them instantly because _public_http_url() requires an http/https scheme, making search results unfetchable in deep research mode. Added _resolve_url() to: - Convert protocol-relative URLs to absolute (https:) - Convert path-relative URLs to absolute - Extract the real URL from DuckDuckGo's /l/?uddg= redirect parameters --- services/search/providers.py | 22 +++++++++++++++++++++- src/search/providers.py | 24 +++++++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/services/search/providers.py b/services/search/providers.py index c760b5aff..b7cdce665 100644 --- a/services/search/providers.py +++ b/services/search/providers.py @@ -4,6 +4,7 @@ import json import logging import os from typing import List, Optional +from urllib.parse import urljoin, urlparse, parse_qs import httpx from bs4 import BeautifulSoup @@ -299,6 +300,25 @@ def _brave_search_impl(query: str, count: int, time_filter: Optional[str] = None def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]: """Search using DuckDuckGo via the duckduckgo-search library. No API key needed.""" + def _resolve_url(raw: str) -> str: + """Resolve DuckDuckGo redirect URL to the actual destination URL.""" + if not raw: + return raw + resolved = raw + if resolved.startswith("//"): + resolved = "https:" + resolved + elif resolved.startswith("/"): + resolved = urljoin("https://html.duckduckgo.com", resolved) + try: + parsed = urlparse(resolved) + if "duckduckgo.com" in (parsed.hostname or "") and parsed.path.rstrip("/") == "/l": + qs = parse_qs(parsed.query) + if "uddg" in qs: + return qs["uddg"][0] + except Exception: + pass + return resolved + def _html_fallback() -> List[dict]: try: response = httpx.get( @@ -314,7 +334,7 @@ def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] = link = result.select_one(".result__a") if not link: continue - url = link.get("href", "") + url = _resolve_url(link.get("href", "")) if not url: continue snippet_el = result.select_one(".result__snippet") diff --git a/src/search/providers.py b/src/search/providers.py index f60a0248f..ee16a50b5 100644 --- a/src/search/providers.py +++ b/src/search/providers.py @@ -4,6 +4,7 @@ import json import logging import os from typing import List, Optional +from urllib.parse import urljoin, urlparse, parse_qs import httpx from bs4 import BeautifulSoup @@ -300,6 +301,27 @@ def _brave_search_impl(query: str, count: int, time_filter: Optional[str] = None def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]: """Search using DuckDuckGo via the duckduckgo-search library. No API key needed.""" + def _resolve_url(raw: str) -> str: + """Resolve DuckDuckGo redirect URL to the actual destination URL.""" + if not raw: + return raw + # Handle protocol-relative URLs + resolved = raw + if resolved.startswith("//"): + resolved = "https:" + resolved + elif resolved.startswith("/"): + resolved = urljoin("https://html.duckduckgo.com", resolved) + # Extract the actual URL from DuckDuckGo's /l/?uddg= redirect + try: + parsed = urlparse(resolved) + if "duckduckgo.com" in (parsed.hostname or "") and parsed.path.rstrip("/") == "/l": + qs = parse_qs(parsed.query) + if "uddg" in qs: + return qs["uddg"][0] + except Exception: + pass + return resolved + def _html_fallback() -> List[dict]: try: response = httpx.get( @@ -315,7 +337,7 @@ def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] = link = result.select_one(".result__a") if not link: continue - url = link.get("href", "") + url = _resolve_url(link.get("href", "")) if not url: continue snippet_el = result.select_one(".result__snippet") From 1494a0b7ee46794e51e1b17cfea101fe99097633 Mon Sep 17 00:00:00 2001 From: Kevin <120500656+oooindefatigable@users.noreply.github.com> Date: Tue, 2 Jun 2026 01:30:38 +0200 Subject: [PATCH 005/496] fix: normalize JS static MIME types on Windows Refs #802 --- app.py | 17 ++++++++++++++++ tests/test_app_static_mime.py | 37 +++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 tests/test_app_static_mime.py diff --git a/app.py b/app.py index 1314d58bc..838a0de3c 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,23 @@ # app.py — slim orchestrator +import mimetypes import os + +def register_static_mime_types() -> None: + """Force stable JS module MIME types across platforms. + + Some native Windows setups inherit stale/incorrect registry mappings for + ``.js``/``.mjs``, which can make Starlette serve ES modules with a non-JS + ``Content-Type`` and cause the UI to load but fail on click. Re-register the + standard MIME types at startup so static assets are served consistently. + """ + + mimetypes.add_type("text/javascript", ".js") + mimetypes.add_type("application/javascript", ".mjs") + + +register_static_mime_types() + # Windows: force HuggingFace/fastembed to COPY model files instead of symlinking. # On a network-share/UNC data dir Windows can't follow HF's symlinks ([WinError # 1463]), so the ONNX embedding model fails to load. huggingface_hub reads this diff --git a/tests/test_app_static_mime.py b/tests/test_app_static_mime.py new file mode 100644 index 000000000..a7ff4767c --- /dev/null +++ b/tests/test_app_static_mime.py @@ -0,0 +1,37 @@ +import ast +import mimetypes +from pathlib import Path + + +def _load_register_static_mime_types(): + app_path = Path(__file__).resolve().parents[1] / "app.py" + tree = ast.parse(app_path.read_text(encoding="utf-8"), filename=str(app_path)) + fn = next(node for node in tree.body if isinstance(node, ast.FunctionDef) and node.name == "register_static_mime_types") + module = ast.Module(body=[fn], type_ignores=[]) + ns = {"mimetypes": mimetypes} + exec(compile(module, str(app_path), "exec"), ns) + return ns["register_static_mime_types"] + + +def test_register_static_mime_types_restores_js_module_types(): + register_static_mime_types = _load_register_static_mime_types() + original_js = mimetypes.types_map.get(".js") + original_mjs = mimetypes.types_map.get(".mjs") + try: + mimetypes.types_map[".js"] = "text/plain" + mimetypes.types_map.pop(".mjs", None) + + register_static_mime_types() + + assert mimetypes.types_map[".js"] == "text/javascript" + assert mimetypes.types_map[".mjs"] == "application/javascript" + finally: + if original_js is None: + mimetypes.types_map.pop(".js", None) + else: + mimetypes.types_map[".js"] = original_js + + if original_mjs is None: + mimetypes.types_map.pop(".mjs", None) + else: + mimetypes.types_map[".mjs"] = original_mjs From cb13d090298a60be085fbd75ec45102f92ca1bae Mon Sep 17 00:00:00 2001 From: James Arslan Date: Tue, 2 Jun 2026 00:34:51 +0000 Subject: [PATCH 006/496] Fix tool-calling HTTP 400 on Gemini and Ollama: send null, not empty, assistant content When an agent turn uses native (OpenAI-style) function calling and the model returns only tool calls with no prose, _append_tool_results built the follow-up assistant message with content "" (empty string). Google Gemini's OpenAI-compatible endpoint and Ollama both reject an assistant message that carries tool_calls alongside an empty-string content with HTTP 400. Because that message feeds the tool results back to the model, every tool-using turn on these providers dies at the second round: the tool runs, but the agent never produces a result. Use None (JSON null) instead, which is the spec-correct form the OpenAI SDK itself emits and which OpenAI and Anthropic accept too. Adds tests covering the native tool-call content shaping. --- src/agent_loop.py | 9 +++++- tests/test_agent_loop.py | 68 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/src/agent_loop.py b/src/agent_loop.py index fd0f440ef..f77634526 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -1054,7 +1054,14 @@ def _append_tool_results( """ if used_native and native_tool_calls: assistant_msg = {"role": "assistant"} - assistant_msg["content"] = round_response if round_response.strip() else "" + # When the model emitted ONLY tool calls (no prose), content must be + # null, NOT an empty string. Google Gemini's OpenAI-compatible endpoint + # and Ollama both reject an assistant message that carries tool_calls + # alongside empty-string content with HTTP 400 ("contents is not + # specified" / a JSON parse error), which aborts every tool-using turn + # at the follow-up round. null (i.e. omitted text) is the spec-correct + # form the OpenAI SDK itself emits, and OpenAI/Anthropic accept it too. + assistant_msg["content"] = round_response if round_response.strip() else None if round_reasoning: assistant_msg["reasoning_content"] = round_reasoning assistant_msg["tool_calls"] = [ diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py index e2ba3509f..ca0a1c1f4 100644 --- a/tests/test_agent_loop.py +++ b/tests/test_agent_loop.py @@ -1,5 +1,5 @@ -"""Tests for agent_loop.py — _detect_admin_intent and _compute_final_metrics. -Uses mock imports to avoid loading the full app stack.""" +"""Tests for agent_loop.py — _detect_admin_intent, _compute_final_metrics, +and _append_tool_results. Uses mock imports to avoid loading the full app stack.""" import sys from unittest.mock import MagicMock @@ -15,7 +15,11 @@ for mod in [ if mod not in sys.modules: sys.modules[mod] = MagicMock() -from src.agent_loop import _detect_admin_intent, _compute_final_metrics +from src.agent_loop import ( + _detect_admin_intent, + _compute_final_metrics, + _append_tool_results, +) # --------------------------------------------------------------------------- @@ -239,3 +243,61 @@ class TestComputeFinalMetrics: m = _compute_final_metrics(**self._base_args(tool_events=[], round_texts=[])) assert "tool_events" not in m assert "round_texts" not in m + + +# --------------------------------------------------------------------------- +# _append_tool_results — native tool-call message shaping +# --------------------------------------------------------------------------- + +class TestAppendToolResultsNativeContent: + """After a native tool call with no prose, the assistant message's content + must be JSON null (None), not an empty string. Google Gemini's + OpenAI-compatible endpoint and Ollama both reject `tool_calls` + "" + content with HTTP 400, which breaks every tool-using turn.""" + + def _native(self): + return [{"id": "call_abc", "name": "web_fetch", "arguments": '{"url": "https://example.com"}'}] + + def test_empty_text_yields_null_content(self): + messages = [] + _append_tool_results( + messages, "", self._native(), [{}], ["page text"], + used_native=True, round_num=1, + ) + assistant = messages[0] + assert assistant["role"] == "assistant" + assert assistant["content"] is None # NOT "" + assert assistant["tool_calls"][0]["id"] == "call_abc" + assert assistant["tool_calls"][0]["type"] == "function" + # tool result follows as a role:tool message keyed by tool_call_id + assert messages[1]["role"] == "tool" + assert messages[1]["tool_call_id"] == "call_abc" + assert messages[1]["content"] == "page text" + + def test_whitespace_only_text_yields_null_content(self): + messages = [] + _append_tool_results( + messages, " \n\t ", self._native(), [{}], ["r"], + used_native=True, round_num=2, + ) + assert messages[0]["content"] is None + + def test_real_prose_is_preserved(self): + messages = [] + _append_tool_results( + messages, "Let me check that page.", self._native(), [{}], ["r"], + used_native=True, round_num=1, + ) + assert messages[0]["content"] == "Let me check that page." + + def test_non_native_path_unaffected(self): + # The text-block fallback path still wraps results in a user message. + messages = [] + _append_tool_results( + messages, "thinking...", [], ["tool output"], [], + used_native=False, round_num=1, + ) + assert messages[0]["role"] == "assistant" + assert messages[0]["content"] == "thinking..." + assert messages[1]["role"] == "user" + assert "tool output" in messages[1]["content"] From 96618b01c04a5cd7517ae1984b57b721ac453856 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Tue, 2 Jun 2026 09:36:03 +0900 Subject: [PATCH 007/496] Polish task UI slash commands and Ollama serving --- ROADMAP.md | 37 +++++++++++++++-- routes/cookbook_helpers.py | 2 + routes/cookbook_routes.py | 75 ++++++++++++++++++++++++---------- static/js/cookbookRunning.js | 37 +++++++++++++---- static/js/cookbookServe.js | 3 +- static/js/slashAutocomplete.js | 2 +- static/js/slashCommands.js | 14 ++++--- static/js/tasks.js | 17 +++++++- static/style.css | 13 ++++-- 9 files changed, 155 insertions(+), 45 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index aa79c3088..4893bdeef 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -8,25 +8,54 @@ the codebase, you are probably right to stay away. ## High Priority - SQUASH BUGS -- Fresh Docker install smoke tests on Linux, macOS, and Windows!! +- Fresh install smoke tests on Linux, macOS, and Windows. Docker, native Python, + and WSL all need coverage. - Integration audit: do integrations even work? Confirm what works, what needs setup docs, and what should be removed or hidden. - Self-host troubleshooting cookbook. Document the weird 30-second fixes that otherwise become 30-minute searches: Dovecot cleartext auth for local stacks, ntfy Android Instant Delivery for non-ntfy.sh servers, clipboard limits on plain-HTTP Tailscale URLs, Radicale collection URLs, and similar traps. - Cookbook reliability on other computers. This is probably the area most likely to need work across different machines, GPUs, drivers, shells, and Python environments. -- Tile/window management correctness. I had to brute force my way a bit here, I'm aware, popups, dropdowns, and fixed-position UI inside transformed modals can land in the wrong place. -- Esc button, it's small but a lot of windows that arent still close on esc and alot of them doesnt. -- Skill audit, how does your model respond to skill injection, does it follow? Does its parsing miss? +- Cookbook SGLang support across platforms. Make sure SGLang setup/serve works + predictably on Linux, Windows/WSL, macOS where possible, Docker, and common + NVIDIA/AMD hardware paths. +- Deep Research model presets by hardware. Recommend approved model/parameter + profiles for small, medium, and large local setups so people with different + hardware can use Deep Research without guessing. Surface this either in Deep + Research settings or as a Cookbook scan/dropdown suggestion. +- Cookbook model scan/download ranking. Prioritize newer architectures and + better hardware-fit models instead of scoring everything almost the same. + Ranking should account for architecture age, quant format, VRAM/RAM fit, + backend support, vision/mmproj requirements, and likely serve reliability. +- Cookbook error feedback and logging. Failed downloads, dependency installs, + preflights, and serve jobs should show the actual command/output/error in the + UI, with copyable logs and clear next steps instead of just "crashed". +- Agent prompt/context bloat. Agent mode is too heavy for smaller local models: + tool schemas, skills, memory, documents, and instructions can eat the context + before the user request really starts. We need slimmer prompts, better tool + selection, smaller default tool sets, and clearer guidance for models with + 4k/8k/16k context windows. +- Skill/tool prompt-injection audit. User-editable skills, notes, documents, + fetched pages, and memories should be treated as untrusted data. Keep testing + whether models follow malicious instructions from those surfaces. - Better degraded-state reporting for ChromaDB, SearXNG, email, ntfy, and provider probes. - Provider setup/probing audit for Anthropic, Gemini, Groq, xAI, OpenRouter, OpenAI, and DeepSeek. ## Refactor Targets - CSS cleanup. `static/style.css` basically Calypso's island atm. - Tour core helper. The onboarding tours have too much copy-pasted scaffolding; promote a shared `tour-core.js` helper before adding more tours. +- Modal/window positioning cleanup. Some window controls have improved, but the + underlying popup/dropdown/fixed-position behavior is still too fragile. - Mobile media override discoverability. A lot of "CSS did not move" bugs are mobile `@media` overrides of the same selector; comments or linting around desktop/mobile paired rules would help. - Dead code pass for old routes, stale feature flags, and unused UI states. ## Frontend +- Expand the Editor for quicker, more robust everyday use. Better file/document + handling, smoother window behavior, clearer save/export flows, stronger image + editing affordances, and fewer brittle edge cases. +- Better AI integration for Notes and Todos. Notes should be easier for the + agent to read, update, summarize, and turn into actions. Todos should be + assignable to an agent from the UI, possibly through a button, task action, + or dedicated skill/tool flow. - Mobile gallery/editor polish. Easier to launch/download inpaint model or any missing pieces. - Accessibility pass: keyboard navigation, focus states, contrast, reduced motion. - Improve empty states and error messages on fresh installs. diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index ca954ab9e..c311b24e6 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -434,6 +434,8 @@ def _parse_serve_phase(snapshot: str, task_type: str = "serve") -> dict: } if "Application startup complete" in flat: return {"phase": "ready", "status": "ready"} + if re.search(r'Ollama API ready on port\s+\d+', flat, re.I): + return {"phase": "ready", "status": "ready"} # HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up and serving if re.search(r'(?:GET|POST)\s+/[^\s]*\s+HTTP/[\d.]+"\s*\d{3}', flat): return {"phase": "idle", "status": "ready"} diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index c622d38a5..4a019750f 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -905,6 +905,7 @@ def setup_cookbook_routes() -> APIRouter: # Show whether the HF token reached this server (masked) — a gated # model vLLM has to download will be denied without it. runner_lines.append(_HF_TOKEN_STATUS_SNIPPET) + handled_ollama_serve = False # Auto-install inference engine if missing if "llama_cpp" in req.cmd or "llama-server" in req.cmd: # Prefer the NATIVE llama-server binary — its minja templating @@ -978,17 +979,48 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' fi') runner_lines.append('fi') elif "ollama" in req.cmd: - # Ollama manages its own model store and HTTP server. Just make - # sure the binary exists and the daemon is up before running the - # command (the natural serving engine on Apple Silicon / Metal). + handled_ollama_serve = True + _ollama_port = "11434" + _ollama_match = re.search(r"OLLAMA_HOST=[^\s:]+:(\d+)", req.cmd) + if _ollama_match: + _ollama_port = _ollama_match.group(1) + # Ollama can be a host binary, a system service, or a Docker + # container. If the HTTP API is already reachable, the model is + # already served and we should not require a host `ollama` CLI. + runner_lines.append(f'ODYSSEUS_OLLAMA_PORT="{_ollama_port}"') + runner_lines.append('ODYSSEUS_OLLAMA_URL=""') + runner_lines.append('for _ody_ollama_port in "$ODYSSEUS_OLLAMA_PORT" 11434; do') + runner_lines.append(' [ -z "$_ody_ollama_port" ] && continue') + runner_lines.append(' for _ody_ollama_host in 127.0.0.1 localhost host.docker.internal; do') + runner_lines.append(' _ody_ollama_url="http://${_ody_ollama_host}:${_ody_ollama_port}"') + runner_lines.append(' if curl -sf "$_ody_ollama_url/api/tags" >/dev/null 2>&1; then') + runner_lines.append(' ODYSSEUS_OLLAMA_URL="$_ody_ollama_url"') + runner_lines.append(' ODYSSEUS_OLLAMA_PORT="$_ody_ollama_port"') + runner_lines.append(' break 2') + runner_lines.append(' fi') + runner_lines.append(' done') + runner_lines.append('done') + runner_lines.append('if [ -n "$ODYSSEUS_OLLAMA_URL" ]; then') + runner_lines.append(' if [ "$ODYSSEUS_OLLAMA_PORT" != "' + _ollama_port + '" ]; then') + runner_lines.append(' echo "[odysseus] Selected Ollama port ' + _ollama_port + ' was not reachable; using running Ollama on port ${ODYSSEUS_OLLAMA_PORT}."') + runner_lines.append(' fi') + runner_lines.append(' echo "[odysseus] Ollama API ready on port ${ODYSSEUS_OLLAMA_PORT}: ${ODYSSEUS_OLLAMA_URL}"') + runner_lines.append(' echo "[odysseus] This task is monitoring an existing Ollama server; stopping it here will not stop an external Docker/system service."') + runner_lines.append(' exec bash -i') + runner_lines.append('fi') runner_lines.append('if ! command -v ollama &>/dev/null; then') - runner_lines.append(' echo "ERROR: Ollama not found. Install it (macOS: brew install ollama, or https://ollama.com/download), then launch again."') - runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') - runner_lines.append('fi') - runner_lines.append('if ! curl -sf http://localhost:11434/api/tags >/dev/null 2>&1; then') - runner_lines.append(' echo "Starting ollama server..."; (ollama serve >/dev/null 2>&1 &)') - runner_lines.append(' for _ in 1 2 3 4 5 6 7 8 9 10; do curl -sf http://localhost:11434/api/tags >/dev/null 2>&1 && break; sleep 1; done') + runner_lines.append(' echo "ERROR: Ollama not found and no Ollama API is reachable on 127.0.0.1, localhost, or host.docker.internal (ports ${ODYSSEUS_OLLAMA_PORT}/11434)."') + runner_lines.append(' echo "Install Ollama, start an Ollama service/container on this server, or pick the port where it is already listening."') + runner_lines.append(' echo') + runner_lines.append(' echo "=== Process exited with code 127 ==="') + runner_lines.append(' exec bash -i') runner_lines.append('fi') + runner_lines.append('echo "Starting ollama server on 0.0.0.0:${ODYSSEUS_OLLAMA_PORT}..."') + runner_lines.append('OLLAMA_HOST="0.0.0.0:${ODYSSEUS_OLLAMA_PORT}" ollama serve') + runner_lines.append('_ody_exit=$?') + runner_lines.append('echo') + runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="') + runner_lines.append('exec bash -i') elif "vllm serve" in req.cmd: # vLLM is CUDA/ROCm-only and does not run on macOS at all. runner_lines.append('if [ "$(uname -s)" = "Darwin" ]; then') @@ -1016,18 +1048,19 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append('fi') - _append_serve_preflight_exit_lines( - runner_lines, - keep_shell_open=not local_windows, - ) - runner_lines.append(req.cmd) - if local_windows: - # Detached background process — no interactive shell to keep open. - # Print the exit marker the status poller looks for, then stop. - _append_serve_exit_code_lines(runner_lines, keep_shell_open=False) - else: - # Keep shell open after exit so user can see errors - _append_serve_exit_code_lines(runner_lines, keep_shell_open=True) + if not handled_ollama_serve: + _append_serve_preflight_exit_lines( + runner_lines, + keep_shell_open=not local_windows, + ) + runner_lines.append(req.cmd) + if local_windows: + # Detached background process — no interactive shell to keep open. + # Print the exit marker the status poller looks for, then stop. + _append_serve_exit_code_lines(runner_lines, keep_shell_open=False) + else: + # Keep shell open after exit so user can see errors + _append_serve_exit_code_lines(runner_lines, keep_shell_open=True) runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh" runner_path.write_text("\n".join(runner_lines) + "\n", encoding="utf-8") diff --git a/static/js/cookbookRunning.js b/static/js/cookbookRunning.js index c654563f3..dc4fac722 100644 --- a/static/js/cookbookRunning.js +++ b/static/js/cookbookRunning.js @@ -169,6 +169,9 @@ export function _parseServePhase(snapshot) { if (flat.includes('Application startup complete')) { return { phase: 'ready', status: 'ready' }; } + if (/Ollama API ready on port\s+\d+/i.test(flat)) { + return { phase: 'ready', status: 'ready' }; + } // HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up if (/(?:GET|POST)\s+\/[^\s]*\s+HTTP\/[\d.]+"\s*\d{3}/.test(flat)) { return { phase: 'idle', status: 'ready' }; @@ -2295,15 +2298,24 @@ async function _reconnectTask(el, task) { if (task.type === 'serve' && !task._endpointAdded && !task._endpointAddInFlight && task._serveReady) { task._endpointAddInFlight = true; const rawHost = task.remoteHost || 'localhost'; - const host = rawHost.includes('@') ? rawHost.split('@').pop() : rawHost; + let host = rawHost.includes('@') ? rawHost.split('@').pop() : rawHost; const portMatch = task.payload?._cmd?.match(/--port[=\s]+(\d+)/) || task.payload?._cmd?.match(/(?:^|\s)-p[=\s]+(\d+)/) || snapshot.match(/Uvicorn running on\D*?:(\d+)/i) || snapshot.match(/running on\D*?:(\d+)/i) || snapshot.match(/listening on\D*?:(\d+)/i) || snapshot.match(/port[:=\s]+(\d+)/i); - const port = portMatch ? portMatch[1] : '8000'; - const baseUrl = `http://${host}:${port}/v1`; + let port = portMatch ? portMatch[1] : '8000'; + let baseUrl = `http://${host}:${port}/v1`; + const ollamaUrlMatch = snapshot.match(/Ollama API ready on port\s+\d+:\s*(http:\/\/[^\s]+)/i); + if (ollamaUrlMatch) { + try { + const u = new URL(ollamaUrlMatch[1]); + host = u.hostname || host; + port = u.port || '11434'; + baseUrl = `${u.origin}/v1`; + } catch {} + } fetch('/api/model-endpoints', { credentials: 'same-origin' }) .then(r => r.json()) .then(async (eps) => { @@ -2642,10 +2654,21 @@ async function _pollBackgroundStatus() { if (localTask && localTask._endpointAdded) continue; const rawHost = localTask?.remoteHost || t.remote || 'localhost'; - const host = rawHost.includes('@') ? rawHost.split('@').pop() : (rawHost === 'local' ? 'localhost' : rawHost); - const portMatch = localTask?.payload?._cmd?.match(/--port\s+(\d+)/); - const port = portMatch ? portMatch[1] : '8000'; - const baseUrl = `http://${host}:${port}/v1`; + let host = rawHost.includes('@') ? rawHost.split('@').pop() : (rawHost === 'local' ? 'localhost' : rawHost); + const portMatch = localTask?.payload?._cmd?.match(/--port\s+(\d+)/) + || localTask?.payload?._cmd?.match(/OLLAMA_HOST=[^\s:]+:(\d+)/); + let port = portMatch ? portMatch[1] : '8000'; + let baseUrl = `http://${host}:${port}/v1`; + const snapshot = t.output || localTask?.output || ''; + const ollamaUrlMatch = snapshot.match(/Ollama API ready on port\s+\d+:\s*(http:\/\/[^\s]+)/i); + if (ollamaUrlMatch) { + try { + const u = new URL(ollamaUrlMatch[1]); + host = u.hostname || host; + port = u.port || '11434'; + baseUrl = `${u.origin}/v1`; + } catch {} + } const _isDiffusion = localTask?.payload?._cmd?.includes('diffusion_server'); _updateTask(t.session_id, { _serveReady: true, _endpointAdded: true }); diff --git a/static/js/cookbookServe.js b/static/js/cookbookServe.js index 3894b9b5b..6b5961b7f 100644 --- a/static/js/cookbookServe.js +++ b/static/js/cookbookServe.js @@ -391,7 +391,8 @@ function _rerenderCachedModels() { panelHtml += ``; panelHtml += ``; panelHtml += ``; - panelHtml += ``; + const defaultPort = defaultBackend === 'ollama' ? '11434' : _nextAvailablePort(); + panelHtml += ``; const _activeGpus = (defaultGpus || '').split(',').map(s => s.trim()).filter(Boolean); const detectedGpuCount = Number(_getGpuToggleTotal?.() || 0); const _gpuMax = Math.max(detectedGpuCount || 8, ...(_activeGpus.map(Number).filter(n => !isNaN(n)).map(n => n + 1))); diff --git a/static/js/slashAutocomplete.js b/static/js/slashAutocomplete.js index 10fbd4277..8745c98a6 100644 --- a/static/js/slashAutocomplete.js +++ b/static/js/slashAutocomplete.js @@ -18,7 +18,7 @@ const EXCLUDED = new Set(['flip','roll','8ball','fortune','odyssey','ascii']); // are the short forms people will actually type (/new, /clear, /web, etc.) // rather than the full /chats new, /toggle web equivalents. const PROMOTED_ALIASES = new Set([ - 'new','clear','rename','fork','export','archive','important','star', + 'new','clear','rename','fork','export','archive','favorite','unfavorite', 'web','bash','research','doc', 'memories','forget', ]); diff --git a/static/js/slashCommands.js b/static/js/slashCommands.js index 76116b5a9..4d2497204 100644 --- a/static/js/slashCommands.js +++ b/static/js/slashCommands.js @@ -5393,8 +5393,8 @@ const COMMANDS = { 'delete': { handler: _cmdSessionDelete, alias: ['del','rm'], help: 'Delete chat', usage: '/chats delete [id]' }, 'archive': { handler: _cmdSessionArchive, alias: ['tar'], help: 'Archive chat', usage: '/chats archive [id]' }, 'rename': { handler: _cmdSessionRename, alias: ['mv'], help: 'Rename current chat', usage: '/chats rename Name' }, - 'important': { handler: _cmdSessionImportant, alias: ['pin'], help: 'Mark as important', usage: '/chats important' }, - 'unimportant': { handler: _cmdSessionUnimportant, alias: ['unpin'], help: 'Unmark important', usage: '/chats unimportant' }, + 'favorite': { handler: _cmdSessionImportant, alias: ['pin','important'], help: 'Mark as favorite', usage: '/chats favorite' }, + 'unfavorite': { handler: _cmdSessionUnimportant, alias: ['unpin','unimportant'], help: 'Unmark favorite', usage: '/chats unfavorite' }, 'fork': { handler: _cmdSessionFork, alias: ['cp'], help: 'Fork chat (keep first N msgs)', usage: '/chats fork [N]' }, 'truncate': { handler: _cmdSessionTruncate, alias: [], help: 'Delete older messages, keep last N', usage: '/chats truncate N' }, 'switch': { handler: _cmdSessionSwitch, alias: ['goto','cd'], help: 'Switch to chat by name/id', usage: '/chats switch name' }, @@ -5732,10 +5732,12 @@ export const LEGACY_ALIASES = { 'del': { parent: 'chats', sub: 'delete' }, 'archive': { parent: 'chats', sub: 'archive' }, 'rename': { parent: 'chats', sub: 'rename' }, - 'important': { parent: 'chats', sub: 'important' }, - 'star': { parent: 'chats', sub: 'important' }, - 'unimportant': { parent: 'chats', sub: 'unimportant' }, - 'unstar': { parent: 'chats', sub: 'unimportant' }, + 'favorite': { parent: 'chats', sub: 'favorite' }, + 'important': { parent: 'chats', sub: 'favorite' }, + 'star': { parent: 'chats', sub: 'favorite' }, + 'unfavorite': { parent: 'chats', sub: 'unfavorite' }, + 'unimportant': { parent: 'chats', sub: 'unfavorite' }, + 'unstar': { parent: 'chats', sub: 'unfavorite' }, 'fork': { parent: 'chats', sub: 'fork' }, 'truncate': { parent: 'chats', sub: 'truncate' }, 'sessions': { parent: 'chats', sub: 'info' }, diff --git a/static/js/tasks.js b/static/js/tasks.js index 3cb55b310..9d18afc39 100644 --- a/static/js/tasks.js +++ b/static/js/tasks.js @@ -349,10 +349,23 @@ function _taskIcon(task) { return `${path}`; } +const _MODEL_BACKED_ACTIONS = new Set([ + 'summarize_emails', + 'draft_email_replies', + 'extract_email_events', + 'classify_events', + 'mark_email_boundaries', + 'learn_sender_signatures', + 'check_email_urgency', + 'test_skills', + 'audit_skills', + 'consolidate_memory', +]); + function _taskAiMark(task) { const kind = task?.task_type || task?.kind || ''; const action = task?.action || ''; - const aiAction = /(^|_)(ai|summarize|summary|draft|reply|classify|triage|audit|research|brief|skills?)($|_)/i.test(action); + const aiAction = _MODEL_BACKED_ACTIONS.has(action); if (!(kind === 'llm' || kind === 'research' || task?.model || task?.endpointUrl || aiAction)) return ''; return ''; } @@ -708,7 +721,7 @@ function _renderList() { const runBtn = document.createElement('button'); runBtn.className = 'task-status-badge task-run-now-badge task-card-run-btn'; runBtn.title = 'Run now'; - runBtn.style.cssText = 'position:relative;top:4px;margin-right:4px;'; + runBtn.style.cssText = 'position:relative;top:1px;margin-right:4px;'; runBtn.innerHTML = 'Run'; runBtn.addEventListener('click', (e) => { e.stopPropagation(); _doRunNow(task.id); }); actionsWrap.insertBefore(runBtn, menuBtn); diff --git a/static/style.css b/static/style.css index 36984517a..456cb3e30 100644 --- a/static/style.css +++ b/static/style.css @@ -10203,6 +10203,12 @@ textarea.memory-add-input { height: 20px; min-height: 0; box-sizing: border-box; + position: relative; + top: -4px; +} +.task-state-badge svg { + position: relative; + top: -1px; } .task-status-badge:hover { filter: brightness(1.08) saturate(1.15); @@ -21253,6 +21259,7 @@ a.chat-link[href^="#research-"] { } .task-card .task-card-run-btn { margin-right: 1px !important; + top: 0; } } @@ -34765,7 +34772,7 @@ body.theme-frosted .modal { .slash-autocomplete-popup { position: fixed; z-index: 9000; - background: var(--bg-elev-2, #1a1a1a); + background: var(--panel, var(--bg)); border: 1px solid var(--border, rgba(255,255,255,0.08)); border-radius: 8px; box-shadow: 0 8px 24px rgba(0,0,0,0.35); @@ -34793,8 +34800,8 @@ body.theme-frosted .modal { white-space: nowrap; overflow: hidden; } -.slash-ac-row:hover { background: color-mix(in srgb, var(--fg) 6%, transparent); } -.slash-ac-row-sel { background: color-mix(in srgb, var(--accent, var(--red)) 14%, transparent); } +.slash-ac-row:hover { background-color: color-mix(in srgb, var(--accent, var(--red)) 10%, transparent); } +.slash-ac-row-sel { background-color: color-mix(in srgb, var(--accent, var(--red)) 14%, transparent); } .slash-ac-token { font-family: 'Fira Code', ui-monospace, monospace; color: var(--accent, var(--red)); From 6a78b02976f1ca618d907d63af00ae0207a28405 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Tue, 2 Jun 2026 09:44:24 +0900 Subject: [PATCH 008/496] Fix endpoint model preservation for tasks --- routes/model_routes.py | 12 ++++++++---- src/endpoint_resolver.py | 32 +++++++++++++++++--------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/routes/model_routes.py b/routes/model_routes.py index a92f06b6e..44b4abd97 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -1402,12 +1402,18 @@ def setup_model_routes(model_discovery): return sess in variants or sess.startswith(base + "/") def _clear_sessions_for_endpoint(db, base_url: str) -> int: + """Drop stored auth for sessions using an endpoint being deleted. + + Keep the session's endpoint URL and model intact. If the admin is + replacing an endpoint with the same URL, clearing those fields leaves + the UI looking selected while chat requests arrive with an empty model. + The chat-time orphan guard still clears truly dead endpoints when no + matching enabled endpoint exists. + """ cleared = 0 rows = db.query(DbSession).filter(DbSession.endpoint_url.isnot(None)).all() for row in rows: if _session_uses_endpoint_url(row.endpoint_url or "", base_url): - row.endpoint_url = "" - row.model = "" row.headers = {} row.updated_at = datetime.utcnow() cleared += 1 @@ -1425,8 +1431,6 @@ def setup_model_routes(model_discovery): try: for sess in list(getattr(manager, "sessions", {}).values()): if _session_uses_endpoint_url(getattr(sess, "endpoint_url", "") or "", base_url): - sess.endpoint_url = "" - sess.model = "" sess.headers = {} cleared += 1 except Exception: diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py index b204c7c9e..72cd054e7 100644 --- a/src/endpoint_resolver.py +++ b/src/endpoint_resolver.py @@ -35,6 +35,18 @@ def _first_chat_model(models) -> Optional[str]: return (models[0] if models else None) +def _endpoint_cached_models(ep) -> list: + """Return cached model ids from the current or legacy endpoint field.""" + raw = getattr(ep, "cached_models", None) or getattr(ep, "models", None) + if not raw: + return [] + try: + models = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return [] + return models if isinstance(models, list) else [] + + # Cache for Tailscale hostname → IP resolution _tailscale_cache: Dict[str, Optional[str]] = {} @@ -236,14 +248,9 @@ def resolve_endpoint( chat_url = build_chat_url(base) headers = build_headers(ep.api_key, base) - # If no model specified, try to pick the first from endpoint's cached list - if not model and hasattr(ep, 'models') and ep.models: - try: - models = json.loads(ep.models) if isinstance(ep.models, str) else ep.models - if models: - model = _first_chat_model(models) - except Exception: - pass + # If no model specified, try to pick the first from endpoint's cached list. + if not model: + model = _first_chat_model(_endpoint_cached_models(ep)) or "" return chat_url, model or fallback_model, headers except Exception as e: @@ -275,13 +282,8 @@ def resolve_endpoint_by_id( chat_url = build_chat_url(base) headers = build_headers(ep.api_key, base) m = (model or "").strip() - if not m and getattr(ep, "models", None): - try: - models = json.loads(ep.models) if isinstance(ep.models, str) else ep.models - if models: - m = _first_chat_model(models) or "" - except Exception: - pass + if not m: + m = _first_chat_model(_endpoint_cached_models(ep)) or "" if not m: return None return chat_url, m, headers From 50b81622e0aeaa0e2f190cf909ec838d47a57862 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Tue, 2 Jun 2026 09:49:35 +0900 Subject: [PATCH 009/496] Allow Docker startup without env file --- docker-compose.yml | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index ef3afda41..d5fc4b85e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,12 +20,37 @@ services: # Lets the container reach local services on the Docker host, including # Ollama at http://host.docker.internal:11434. - "host.docker.internal:host-gateway" - env_file: - - .env environment: + - LLM_HOST=${LLM_HOST:-localhost} + - LLM_HOSTS=${LLM_HOSTS:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-} + - RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-} + - HF_TOKEN=${HF_TOKEN:-} + - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-} - SEARXNG_INSTANCE=http://searxng:8080 - CHROMADB_HOST=chromadb - CHROMADB_PORT=8000 + - DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db} + - AUTH_ENABLED=${AUTH_ENABLED:-true} + - LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false} + - ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin} + - ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-} + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1} + - SECURE_COOKIES=${SECURE_COOKIES:-false} + - EMBEDDING_URL=${EMBEDDING_URL:-} + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} + - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} + - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} + - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} + - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} + - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} + - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} + - TAVILY_API_KEY=${TAVILY_API_KEY:-} + - SERPER_API_KEY=${SERPER_API_KEY:-} # PUID / PGID — the user/group the container drops to before # running uvicorn (entrypoint also chowns /app/data + /app/logs # to match, so bind-mounted files stay editable from the host). From da97f1b9adac229d7994c1396dd393b5b84b20b8 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Tue, 2 Jun 2026 09:50:35 +0900 Subject: [PATCH 010/496] Label Docker bind mounts for SELinux --- docker-compose.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index d5fc4b85e..b2d50de42 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,18 +4,18 @@ services: ports: - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000" volumes: - - ./data:/app/data - - ./logs:/app/logs + - ./data:/app/data:z + - ./logs:/app/logs:z # Cookbook remote-server SSH identity. Odysseus can generate a key here; # add the shown public key to each remote server's authorized_keys. - - ./data/ssh:/app/.ssh + - ./data/ssh:/app/.ssh:z # Cookbook local model cache. Inside Docker, "Local" means the Odysseus # container, so persist its HuggingFace cache under ./data/huggingface. - - ./data/huggingface:/app/.cache/huggingface + - ./data/huggingface:/app/.cache/huggingface:z # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.) # land under /app/.local for the odysseus user. Persist them so a # container recreate does not silently remove installed serve engines. - - ./data/local:/app/.local + - ./data/local:/app/.local:z extra_hosts: # Lets the container reach local services on the Docker host, including # Ollama at http://host.docker.internal:11434. @@ -97,7 +97,7 @@ services: - "127.0.0.1:8080:8080" volumes: - searxng-data:/etc/searxng - - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro + - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z environment: - SEARXNG_BASE_URL=http://localhost:8080/ - SEARXNG_SECRET=${SEARXNG_SECRET:-} From 1c9623a81d63a1ec4d28bef54082e6b1d3766eb6 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Tue, 2 Jun 2026 09:52:52 +0900 Subject: [PATCH 011/496] Protect memory tidy owner scope --- src/builtin_actions.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/builtin_actions.py b/src/builtin_actions.py index 711c7eba5..77f4582c4 100644 --- a/src/builtin_actions.py +++ b/src/builtin_actions.py @@ -78,18 +78,14 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]: manager = MemoryManager(DATA_DIR) all_memories = manager.load_all() - # When the scheduled task was created without an explicit owner - # (the common case for built-in housekeeping rows), task.owner - # arrives as "" or None. The old filter then required memories - # with a matching empty owner — which excluded every real memory - # and the action no-op'd with "nothing to consolidate" even - # though hundreds of memories were sitting there. Treat empty - # owner as "no filter" so the housekeeping action actually runs. + # Empty owner means "all owners" for built-in housekeeping, but never + # mix owners in the same AI prompt/apply step. A specific owner is + # scoped strictly to that owner; unowned rows are their own group. _owner_clean = (owner or "").strip() if _owner_clean: def _belongs_to_owner(mem: dict) -> bool: mem_owner = (mem.get("owner") or "").strip() - return mem_owner == _owner_clean or not mem_owner + return mem_owner == _owner_clean else: def _belongs_to_owner(mem: dict) -> bool: return True @@ -98,21 +94,27 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]: if not owner_memories: raise TaskNoop("no memories to consolidate") + memory_owners = {(m.get("owner") or "").strip() for m in owner_memories} + allow_ai_tidy = len(memory_owners) <= 1 + url, model, headers = resolve_endpoint("utility", owner=owner) if not url or not model: url, model, headers = resolve_endpoint("default", owner=owner) - if url and model and len(owner_memories) >= 2: + if url and model and allow_ai_tidy and len(owner_memories) >= 2: try: + text_limit = 2000 items = [ { "id": m.get("id"), "category": m.get("category", "fact"), - "text": (m.get("text") or "").strip()[:600], + "text": (m.get("text") or "").strip()[:text_limit], + "truncated": len((m.get("text") or "").strip()) > text_limit, } for m in owner_memories if m.get("id") and (m.get("text") or "").strip() ] + truncated_ids = {item["id"] for item in items if item.get("truncated")} prompt = ( "You are tidying a user's saved personal memories. Return ONLY raw JSON, no markdown.\n" "Remove memories that are empty, broken, trivial conversation filler, duplicates, or obsolete " @@ -161,6 +163,9 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]: "text": text, "category": (item.get("category") or by_id[mid].get("category") or "fact").strip(), } + # If the model only saw a truncated memory, do not let + # that partial view delete or rewrite the full memory. + keep_ids.update(mid for mid in truncated_ids if mid in by_id) if keep_ids: changed_text = 0 @@ -173,6 +178,8 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]: if mid not in keep_ids: continue cleaned = cleaned_by_id.get(mid) or {} + if mid in truncated_ids: + cleaned.pop("text", None) if cleaned.get("text") and cleaned["text"] != mem.get("text"): mem["text"] = cleaned["text"] changed_text += 1 @@ -208,10 +215,12 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]: removed_examples = [] for mem in owner_memories: text = (mem.get("text") or "").strip() - key = " ".join(text.lower().split()) - if not key: + normalized = " ".join(text.lower().split()) + if not normalized: removed_examples.append("(empty)") continue + mem_owner = (mem.get("owner") or "").strip() + key = (mem_owner, normalized) if key in seen: if len(removed_examples) < 3: removed_examples.append(text[:60] + ("..." if len(text) > 60 else "")) From d44f40b724f7bbb0061a404ba81ce648551de148 Mon Sep 17 00:00:00 2001 From: ghreprimand Date: Mon, 1 Jun 2026 20:44:39 -0500 Subject: [PATCH 012/496] Honor disabled speech service toggles (#814) Co-authored-by: ghreprimand <203024559+ghreprimand@users.noreply.github.com> --- services/stt/stt_service.py | 4 ++ services/tts/tts_service.py | 5 +++ tests/test_speech_service_toggles.py | 57 ++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 tests/test_speech_service_toggles.py diff --git a/services/stt/stt_service.py b/services/stt/stt_service.py index 9f2fd7e0e..55e57afb1 100644 --- a/services/stt/stt_service.py +++ b/services/stt/stt_service.py @@ -40,6 +40,8 @@ class STTService: @property def available(self) -> bool: settings = self._load_settings() + if settings.get("stt_enabled") is False: + return False provider = settings["stt_provider"] if provider == "disabled": return False @@ -140,6 +142,8 @@ class STTService: def transcribe(self, audio_bytes: bytes) -> Optional[str]: settings = self._load_settings() + if settings.get("stt_enabled") is False: + return None provider = settings["stt_provider"] model = settings["stt_model"] language = settings.get("stt_language", "") diff --git a/services/tts/tts_service.py b/services/tts/tts_service.py index 8b8de886e..a78c8a7da 100644 --- a/services/tts/tts_service.py +++ b/services/tts/tts_service.py @@ -34,6 +34,7 @@ class TTSService: from src.settings import load_settings saved = load_settings() return { + "tts_enabled": saved.get("tts_enabled", True), "tts_provider": saved.get("tts_provider", "disabled"), "tts_model": saved.get("tts_model", "tts-1"), "tts_voice": saved.get("tts_voice", "alloy"), @@ -43,6 +44,8 @@ class TTSService: @property def available(self) -> bool: settings = self._load_settings() + if settings.get("tts_enabled") is False: + return False provider = settings["tts_provider"] if provider == "disabled": return False @@ -128,6 +131,8 @@ class TTSService: def synthesize(self, text: str, use_cache: bool = True) -> Optional[bytes]: settings = self._load_settings() + if settings.get("tts_enabled") is False: + return None provider = settings["tts_provider"] model = settings["tts_model"] voice = settings["tts_voice"] diff --git a/tests/test_speech_service_toggles.py b/tests/test_speech_service_toggles.py new file mode 100644 index 000000000..e853900b3 --- /dev/null +++ b/tests/test_speech_service_toggles.py @@ -0,0 +1,57 @@ +from services.stt.stt_service import STTService +from services.tts.tts_service import TTSService + + +def test_tts_disabled_toggle_blocks_synthesis(monkeypatch, tmp_path): + service = TTSService(cache_dir=str(tmp_path)) + calls = {"endpoint": 0, "kokoro": 0} + + monkeypatch.setattr(service, "_load_settings", lambda: { + "tts_enabled": False, + "tts_provider": "endpoint:voice-endpoint", + "tts_model": "tts-1", + "tts_voice": "alloy", + "tts_speed": "1", + }) + + def fake_endpoint(*args, **kwargs): + calls["endpoint"] += 1 + return b"audio" + + def fake_kokoro(): + calls["kokoro"] += 1 + return None + + monkeypatch.setattr(service, "_synthesize_api", fake_endpoint) + monkeypatch.setattr(service, "_get_kokoro", fake_kokoro) + + assert service.available is False + assert service.synthesize("hello") is None + assert calls == {"endpoint": 0, "kokoro": 0} + + +def test_stt_disabled_toggle_blocks_transcription(monkeypatch): + service = STTService() + calls = {"endpoint": 0, "whisper": 0} + + monkeypatch.setattr(service, "_load_settings", lambda: { + "stt_enabled": False, + "stt_provider": "endpoint:transcribe-endpoint", + "stt_model": "whisper-1", + "stt_language": "", + }) + + def fake_endpoint(*args, **kwargs): + calls["endpoint"] += 1 + return "transcript" + + def fake_whisper(): + calls["whisper"] += 1 + return None + + monkeypatch.setattr(service, "_transcribe_api", fake_endpoint) + monkeypatch.setattr(service, "_get_whisper", fake_whisper) + + assert service.available is False + assert service.transcribe(b"audio") is None + assert calls == {"endpoint": 0, "whisper": 0} From 5ebe9ee67ab69daf5603a2e9482f169cf9625d25 Mon Sep 17 00:00:00 2001 From: mist Date: Tue, 2 Jun 2026 04:53:33 +0300 Subject: [PATCH 013/496] Fix invalidate_search_cache using a key that never matches stored entries (#852) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit invalidate_search_cache(query) built its cache key as generate_cache_key(f"{query}|10|None"), but the write path (searxng_search_results) replaces the caller's default count of 10 with the admin-configured _get_result_count() (default 5) before building the key. So a default search for "X" is cached under "X|5|None", while invalidation looked for "X|10|None" — they never match, and invalidate_search_cache silently failed to remove anything in the default configuration, violating its docstring ("invalidate ... just the given query"). Derive the count from _get_result_count() so invalidation matches the default-search entry the write path actually stores. The same bug (and fix) applies to both the src/search and services/search copies. Note: time-filtered variants (e.g. "X|5|day") still aren't reachable from a query-only signature, since cache keys are opaque SHA-256 hashes with no stored query; clearing those would need a broader cache-index redesign and is out of scope here. Adds tests/test_search_cache_invalidation.py covering the default-count case. --- services/search/core.py | 5 ++- src/search/core.py | 5 ++- tests/test_search_cache_invalidation.py | 45 +++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 tests/test_search_cache_invalidation.py diff --git a/services/search/core.py b/services/search/core.py index 946a0b40d..7208ea2f5 100644 --- a/services/search/core.py +++ b/services/search/core.py @@ -203,7 +203,10 @@ def invalidate_search_cache(query: Optional[str] = None) -> None: search_cache_index.clear() logger.info("All search cache entries have been cleared.") else: - cache_key = generate_cache_key(f"{query}|10|None") + # Match the key the write path stores: searxng_search_results replaces + # the caller's default count with the configured _get_result_count() + # (default 5), so a hardcoded "|10|None" never matched a real entry. + cache_key = generate_cache_key(f"{query}|{_get_result_count()}|None") cache_file = SEARCH_CACHE_DIR / f"{cache_key}.cache" if cache_file.exists(): try: diff --git a/src/search/core.py b/src/search/core.py index f1a34536e..850e026fa 100644 --- a/src/search/core.py +++ b/src/search/core.py @@ -207,7 +207,10 @@ def invalidate_search_cache(query: Optional[str] = None) -> None: search_cache_index.clear() logger.info("All search cache entries have been cleared.") else: - cache_key = generate_cache_key(f"{query}|10|None") + # Match the key the write path stores: searxng_search_results replaces + # the caller's default count with the configured _get_result_count() + # (default 5), so a hardcoded "|10|None" never matched a real entry. + cache_key = generate_cache_key(f"{query}|{_get_result_count()}|None") cache_file = SEARCH_CACHE_DIR / f"{cache_key}.cache" if cache_file.exists(): try: diff --git a/tests/test_search_cache_invalidation.py b/tests/test_search_cache_invalidation.py new file mode 100644 index 000000000..5ad245b40 --- /dev/null +++ b/tests/test_search_cache_invalidation.py @@ -0,0 +1,45 @@ +"""Regression test for invalidate_search_cache key construction. + +The write path (`searxng_search_results`) stores a cache entry under +``generate_cache_key(f"{query}|{count}|{time_filter}")`` where ``count`` is the +admin-configured result count (``_get_result_count()``, default **5**) — it +replaces the caller's default of 10 with the configured value before building +the key. + +The original ``invalidate_search_cache`` hardcoded ``f"{query}|10|None"``, so it +never matched the key the write path actually produced (``|5|None`` by default) +and silently failed to invalidate anything — a contract violation of its own +docstring ("invalidate ... just the given query"). The fix derives the count +from ``_get_result_count()`` so invalidation matches the stored default entry. +""" +import pytest + +from src.search import core +from src.search.cache import generate_cache_key + + +def test_invalidate_uses_configured_count_not_hardcoded_10(tmp_path, monkeypatch): + query = "python tutorial" + result_count = 5 # documented default of _get_result_count() + + # Pin the configured count and redirect the cache dir to keep the test hermetic. + monkeypatch.setattr(core, "_get_result_count", lambda: result_count) + monkeypatch.setattr(core, "SEARCH_CACHE_DIR", tmp_path) + + # Reproduce exactly what searxng_search_results writes for a default search: + # the caller's default count of 10 is replaced by result_count, time_filter=None. + write_key = generate_cache_key(f"{query}|{result_count}|None") + cache_file = tmp_path / f"{write_key}.cache" + cache_file.write_text("{}", encoding="utf-8") + core.search_cache_index[write_key] = None + + try: + core.invalidate_search_cache(query) + + assert not cache_file.exists(), ( + "invalidate_search_cache failed to remove the entry the write path " + "stored under the configured result count — it used a mismatched key." + ) + assert write_key not in core.search_cache_index + finally: + core.search_cache_index.pop(write_key, None) From aba15e7b6d540638fae51fe6eea257c1dcc987a8 Mon Sep 17 00:00:00 2001 From: Tatlatat Date: Tue, 2 Jun 2026 09:09:18 +0700 Subject: [PATCH 014/496] fix(cookbook): sort by Fit when the Fit header is clicked (#842) (#860) The Cookbook Scan/Download (hwfit) table gave the Fit column key:'score', so clicking the Fit header sorted by score instead of by fit. Give the Fit column its own 'fit' sort key, add a matching option to the #hwfit-sort select, and rank fit_level (perfect > good > marginal > too_tight > no_fit) in the client-side sort. Default puts the best fit first; clicking again reverses it. Score still sorts by score. Closes #842 --- static/js/cookbook-hwfit.js | 7 ++++++- static/js/cookbook.js | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/static/js/cookbook-hwfit.js b/static/js/cookbook-hwfit.js index e6445f865..425355395 100644 --- a/static/js/cookbook-hwfit.js +++ b/static/js/cookbook-hwfit.js @@ -519,6 +519,11 @@ export async function _hwfitFetch(fresh = false) { const asc = sortSel?.dataset.reverse === '1'; // reversed → ascending (lowest first) const field = { score: 'score', vram: 'required_gb', speed: 'speed_tps', params: 'params_b', context: 'context' }[sortKey] || 'score'; data.models.sort((a, b) => { + if (sortKey === 'fit') { + const rank = { perfect: 4, good: 3, marginal: 2, too_tight: 1, no_fit: 0 }; + const av = rank[a.fit_level] || 0, bv = rank[b.fit_level] || 0; + return asc ? av - bv : bv - av; + } const av = Number(a[field]) || 0, bv = Number(b[field]) || 0; return asc ? av - bv : bv - av; }); @@ -717,7 +722,7 @@ function _wireManualHardwareControls(el) { export const _fitColors = { perfect: 'var(--green, #50fa7b)', good: 'var(--yellow, #f1fa8c)', marginal: 'var(--orange, #ffb86c)', too_tight: 'var(--red, #ff5555)' }; export const _hwfitColumns = [ - { key: 'score', label: 'Fit', cls: 'hwfit-fit' }, + { key: 'fit', label: 'Fit', cls: 'hwfit-fit' }, { key: null, label: 'Model', cls: 'hwfit-name' }, { key: 'params',label: 'Param', cls: 'hwfit-c-params' }, { key: null, label: 'Quant', cls: 'hwfit-c-quant' }, diff --git a/static/js/cookbook.js b/static/js/cookbook.js index 98f5dc769..8eb914a7b 100644 --- a/static/js/cookbook.js +++ b/static/js/cookbook.js @@ -1434,7 +1434,7 @@ function _renderRecipes() { html += ''; html += ''; html += ''; html += ''; From 3f6d630b56a4081d5116df7b2c06c5c9ba61d2df Mon Sep 17 00:00:00 2001 From: wundervrc <147297600+wundervrc@users.noreply.github.com> Date: Mon, 1 Jun 2026 23:40:43 -0230 Subject: [PATCH 015/496] Never resolve to a disabled endpoint model (#861) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Background tasks (e.g. the Email Tags / check_email_urgency action) resolve their model through resolve_endpoint("utility") → Default Chat. When the configured model is one the user has since disabled on the endpoint, the resolver still dispatched to it — on Groq that surfaces as every email failing with "HTTP 400: model ... requires terms acceptance". Two paths fed this: - The auto-pick fallback selected from cached_models without excluding the endpoint's hidden_models, so a disabled model listed first won. - A stale default_model left pointing at a now-disabled model (seeded at endpoint registration from raw model_ids[0]) was used verbatim. Fix resolve_endpoint / resolve_endpoint_by_id to drop a configured model that's in hidden_models and to pick the first ENABLED chat model. Also seed default_model on registration via _first_chat_model so we never pin the global default to an embedding/tts entry a provider lists first. Checks: python -m pytest tests/test_endpoint_resolver.py tests/test_model_routes.py tests/test_model_context.py (all pass); python -m py_compile app.py routes/model_routes.py src/endpoint_resolver.py. Co-authored-by: Claude Opus 4.8 --- routes/model_routes.py | 8 ++- src/endpoint_resolver.py | 39 ++++++++++++- tests/test_endpoint_resolver.py | 99 +++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 5 deletions(-) diff --git a/routes/model_routes.py b/routes/model_routes.py index 44b4abd97..428228595 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -1052,11 +1052,15 @@ def setup_model_routes(model_discovery): ) db.add(ep) db.commit() - # Auto-set as default chat endpoint if none configured yet + # Auto-set as default chat endpoint if none configured yet. Seed + # the first CHAT model (not raw model_ids[0]) so we don't pin the + # global default to an embedding/tts/etc. entry a provider happens + # to list first. settings = _load_settings() if not settings.get("default_endpoint_id"): + from src.endpoint_resolver import _first_chat_model settings["default_endpoint_id"] = ep.id - settings["default_model"] = model_ids[0] if model_ids else "" + settings["default_model"] = _first_chat_model(model_ids) or "" _save_settings(settings) _invalidate_models_cache() _local_probe_cache["data"] = None diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py index 72cd054e7..aec81a8f7 100644 --- a/src/endpoint_resolver.py +++ b/src/endpoint_resolver.py @@ -47,6 +47,29 @@ def _endpoint_cached_models(ep) -> list: return models if isinstance(models, list) else [] +def _endpoint_hidden_models(ep) -> set: + """Model ids the admin disabled on this endpoint (the UI's hidden list).""" + raw = getattr(ep, "hidden_models", None) + if not raw: + return set() + try: + hidden = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return set() + return set(hidden) if isinstance(hidden, list) else set() + + +def _endpoint_enabled_models(ep) -> list: + """Cached models minus the ones disabled on the endpoint, order preserved. + + The auto-pick fallback must never select a model the user disabled — a + Groq endpoint can list 16 models with only 1 enabled, and picking the + raw first one resolves to a model that 400s ("requires terms acceptance"). + """ + hidden = _endpoint_hidden_models(ep) + return [m for m in _endpoint_cached_models(ep) if m not in hidden] + + # Cache for Tailscale hostname → IP resolution _tailscale_cache: Dict[str, Optional[str]] = {} @@ -248,9 +271,15 @@ def resolve_endpoint( chat_url = build_chat_url(base) headers = build_headers(ep.api_key, base) - # If no model specified, try to pick the first from endpoint's cached list. + # Discard a configured model the user has since disabled on the + # endpoint (e.g. a stale `default_model` left pointing at a now-hidden + # model). Treat it as unset so the picker below selects a live one + # instead of dispatching to a disabled model that 400s. + if model and model in _endpoint_hidden_models(ep): + model = "" + # If no (usable) model specified, pick the first enabled chat model. if not model: - model = _first_chat_model(_endpoint_cached_models(ep)) or "" + model = _first_chat_model(_endpoint_enabled_models(ep)) or "" return chat_url, model or fallback_model, headers except Exception as e: @@ -282,8 +311,12 @@ def resolve_endpoint_by_id( chat_url = build_chat_url(base) headers = build_headers(ep.api_key, base) m = (model or "").strip() + # Drop a model the user disabled on the endpoint, then pick the first + # enabled chat model rather than a hidden one. + if m and m in _endpoint_hidden_models(ep): + m = "" if not m: - m = _first_chat_model(_endpoint_cached_models(ep)) or "" + m = _first_chat_model(_endpoint_enabled_models(ep)) or "" if not m: return None return chat_url, m, headers diff --git a/tests/test_endpoint_resolver.py b/tests/test_endpoint_resolver.py index 447aecd32..1c638eaae 100644 --- a/tests/test_endpoint_resolver.py +++ b/tests/test_endpoint_resolver.py @@ -1,4 +1,5 @@ """Tests for endpoint_resolver — pure functions tested directly to avoid import pollution.""" +import json import re from urllib.parse import urlparse @@ -6,6 +7,45 @@ from urllib.parse import urlparse # Copy the pure functions to test them without importing the full module. # This avoids module cache conflicts with other test files that mock dependencies. +_NON_CHAT_MODEL = ( + "text-embedding", "embedding", "tts-", "whisper", "dall-e", + "moderation", "rerank", "reranker", "clip", "stable-diffusion", +) + + +def _first_chat_model(models): + for m in (models or []): + if not any(p in str(m).lower() for p in _NON_CHAT_MODEL): + return m + return (models[0] if models else None) + + +def _endpoint_cached_models(ep) -> list: + raw = getattr(ep, "cached_models", None) or getattr(ep, "models", None) + if not raw: + return [] + try: + models = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return [] + return models if isinstance(models, list) else [] + + +def _endpoint_hidden_models(ep) -> set: + raw = getattr(ep, "hidden_models", None) + if not raw: + return set() + try: + hidden = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return set() + return set(hidden) if isinstance(hidden, list) else set() + + +def _endpoint_enabled_models(ep) -> list: + hidden = _endpoint_hidden_models(ep) + return [m for m in _endpoint_cached_models(ep) if m not in hidden] + def normalize_base(url: str) -> str: url = (url or "").strip().rstrip("/") for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]: @@ -137,3 +177,62 @@ class TestBuildHeaders: def test_empty_key(self): assert build_headers("", "https://api.openai.com/v1") == {} + + +class _Ep: + """Minimal ModelEndpoint stand-in for the model-picking helpers.""" + def __init__(self, cached=None, hidden=None): + self.cached_models = json.dumps(cached) if cached is not None else None + self.hidden_models = json.dumps(hidden) if hidden is not None else None + + +class TestFirstChatModel: + def test_skips_embedding_and_tts(self): + models = ["text-embedding-ada-002", "whisper-large-v3", "gpt-4o"] + assert _first_chat_model(models) == "gpt-4o" + + def test_falls_back_to_first_when_all_non_chat(self): + assert _first_chat_model(["whisper-large-v3"]) == "whisper-large-v3" + + def test_empty(self): + assert _first_chat_model([]) is None + + +class TestEnabledModels: + def test_excludes_hidden(self): + # The Groq repro: 16 models, only gpt-oss-120b enabled. + cached = [ + "openai/gpt-oss-safeguard-20b", "canopylabs/orpheus-arabic-saudi", + "whisper-large-v3", "openai/gpt-oss-120b", + ] + hidden = [ + "openai/gpt-oss-safeguard-20b", "canopylabs/orpheus-arabic-saudi", + "whisper-large-v3", + ] + ep = _Ep(cached=cached, hidden=hidden) + assert _endpoint_enabled_models(ep) == ["openai/gpt-oss-120b"] + + def test_no_hidden_returns_all(self): + ep = _Ep(cached=["a", "b"], hidden=None) + assert _endpoint_enabled_models(ep) == ["a", "b"] + + def test_picker_never_selects_disabled_model(self): + # Regression: a disabled model listed first must not be auto-picked. + cached = ["canopylabs/orpheus-arabic-saudi", "openai/gpt-oss-120b"] + hidden = ["canopylabs/orpheus-arabic-saudi"] + ep = _Ep(cached=cached, hidden=hidden) + assert _first_chat_model(_endpoint_enabled_models(ep)) == "openai/gpt-oss-120b" + + def test_stale_configured_model_is_discarded(self): + # A configured model that's been disabled is dropped, falling through + # to the first enabled chat model. + ep = _Ep( + cached=["canopylabs/orpheus-arabic-saudi", "openai/gpt-oss-120b"], + hidden=["canopylabs/orpheus-arabic-saudi"], + ) + configured = "canopylabs/orpheus-arabic-saudi" + if configured in _endpoint_hidden_models(ep): + configured = "" + if not configured: + configured = _first_chat_model(_endpoint_enabled_models(ep)) + assert configured == "openai/gpt-oss-120b" From 54ecfa39cf997b72c61cf69dca5b28ce00d793ce Mon Sep 17 00:00:00 2001 From: LittleLlama <72672345+LittleLlama9@users.noreply.github.com> Date: Mon, 1 Jun 2026 19:11:17 -0700 Subject: [PATCH 016/496] Provider detection: match by hostname instead of substring (re #768) (#815) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Dedupe URL routing helpers and tighten adjacent hostname checks * Match providers by hostname, not substring, in _detect_provider _detect_provider used `"anthropic.com" in url`-style substring checks, so a URL that merely contained a provider's domain in its path or query — or a look-alike host like `anthropic.com.example` — was misclassified and picked the wrong auth-header/payload shape. Switch it to the existing `_host_match` helper (hostname exact/subdomain match), the same way the human-readable labels and curated model lists already work, finishing that migration. Also harden `_host_match` against trailing-dot FQDNs. Not a credential-leak fix: _detect_provider only classifies a URL the admin already configured next to its key, and the URL — not this function — decides where the request goes. This is a correctness/consistency cleanup. Adds tests that import the real helpers (test_endpoint_resolver.py tests local copies, so it can't catch this) covering the substring false-positives. Refs #768. Co-Authored-By: Claude Opus 4.8 * Import build_headers under its real name in model_routes It was imported as `build_headers as _provider_headers`, which collides with the unrelated llm_core._provider_headers(provider, headers) — same name, different signature. Use the real name to remove the confusion. Co-Authored-By: Claude Opus 4.8 * Use hostname matching in URL builders, not raw suffix checks PR review flagged that _detect_provider() was hardened to match on hostname, but several helpers still used raw host.endswith("anthropic.com") / host.endswith("ollama.com"), which match adjacent hosts like notanthropic.com / notollama.com. Route the remaining checks through _host_match(): _is_ollama_native_url and _ollama_api_root in llm_core, and _anthropic_api_root / _ollama_api_root in endpoint_resolver. With _detect_provider already hostname-correct, the trailing "or host.endswith(...)" clauses in build_chat_url / build_models_url are redundant, so drop them rather than fix the substring match in place. Add builder-level tests asserting look-alike and domain-in-path hosts route to the OpenAI-compatible default. They import the real builders and fail on the pre-fix code. Co-Authored-By: Claude --------- Co-authored-by: Claude Opus 4.8 --- routes/model_routes.py | 119 ++++++++------------------- src/endpoint_resolver.py | 41 +++++----- src/llm_core.py | 77 +++++++++++------ tests/test_provider_detection.py | 136 +++++++++++++++++++++++++++++++ 4 files changed, 245 insertions(+), 128 deletions(-) create mode 100644 tests/test_provider_detection.py diff --git a/routes/model_routes.py b/routes/model_routes.py index 428228595..935f3b93c 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -14,62 +14,19 @@ from pydantic import BaseModel from fastapi.responses import StreamingResponse from core.database import SessionLocal, ModelEndpoint, Session as DbSession from core.middleware import require_admin -from src.llm_core import _detect_provider, ANTHROPIC_MODELS +from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS from src.settings import load_settings as _load_settings, save_settings as _save_settings -from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url +from src.endpoint_resolver import ( + normalize_base as _normalize_base, + build_chat_url, + build_models_url, + build_headers, +) from src.auth_helpers import owner_filter logger = logging.getLogger(__name__) -def _anthropic_api_root(base: str) -> str: - """Return Anthropic's API root without duplicating /v1.""" - base = (base or "").strip().rstrip("/") - host = urlparse(base).hostname or "" - if host.endswith("anthropic.com") and base.endswith("/v1"): - return base[:-3].rstrip("/") - return base - - -def _ollama_api_root(base: str) -> str: - """Return Ollama's native API root without depending on deferred imports.""" - base = (base or "").strip().rstrip("/") - parsed = urlparse(base) - host = parsed.hostname or "" - path = (parsed.path or "").rstrip("/") - if path.endswith("/api"): - return base - if host.endswith("ollama.com"): - root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com" - return root.rstrip("/") + "/api" - return base - - -def _models_url(base: str) -> str: - """Return provider-specific model-list URL for route-local probing.""" - provider = _detect_provider(base) - host = urlparse(base).hostname or "" - if provider == "anthropic" or host.endswith("anthropic.com"): - return _anthropic_api_root(base) + "/v1/models" - if provider == "ollama" or host.endswith("ollama.com"): - return _ollama_api_root(base) + "/tags" - return base.rstrip("/") + "/models" - - -def _provider_headers(api_key: Optional[str], base: str) -> Dict[str, str]: - """Build provider auth headers without depending on import-time stubs.""" - if not api_key: - return {} - provider = _detect_provider(base) - host = urlparse(base).hostname or "" - if provider == "anthropic" or host.endswith("anthropic.com"): - return { - "x-api-key": api_key, - "anthropic-version": "2023-06-01", - } - return {"Authorization": f"Bearer {api_key}"} - - # ── Curated model lists per provider ── # For cloud providers that return 100+ models, only show these by default. # A model ID matches if it starts with or equals a curated entry. @@ -122,31 +79,35 @@ _PROVIDER_CURATED = { ], } -# Map URL substrings → curated-list keys for providers whose _detect_provider() +# Map hostnames → curated-list keys for providers whose _detect_provider() # returns a generic value (e.g. "openai") but deserve their own curated list. # "openrouter" is a sentinel meaning "no curation — show all models as curated". -_URL_TO_CURATED = { - "z.ai": "zai", - "api.deepseek.com": "deepseek", - "api.groq.com": "groq", - "api.mistral.ai": "mistral", - "api.together.xyz": "together", - "api.fireworks.ai": "fireworks", - "generativelanguage.googleapis.com": "google", - "api.x.ai": "xai", - "openrouter.ai": "openrouter", - "ollama.com": "ollama", -} +# Entries are matched by hostname equality or subdomain suffix (via _host_match), +# so e.g. "deepseek.com" covers api.deepseek.com without matching the substring +# inside an unrelated URL. +_HOST_TO_CURATED = ( + ("z.ai", "zai"), + ("deepseek.com", "deepseek"), + ("groq.com", "groq"), + ("mistral.ai", "mistral"), + ("together.xyz", "together"), + ("together.ai", "together"), + ("fireworks.ai", "fireworks"), + ("googleapis.com", "google"), + ("x.ai", "xai"), + ("openrouter.ai", "openrouter"), + ("ollama.com", "ollama"), +) def _match_provider_curated(base_url: str, provider: str) -> str: """Return the curated-list key for a given endpoint. - Checks the base URL against _URL_TO_CURATED first, then falls back - to the raw provider string from _detect_provider(). + Matches the base URL's hostname against known providers; falls back to + the raw provider string from _detect_provider(). """ - for substring, key in _URL_TO_CURATED.items(): - if substring in (base_url or ""): + for domain, key in _HOST_TO_CURATED: + if _host_match(base_url, domain): return key return provider @@ -235,12 +196,12 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1 elif provider == "ollama": from src.llm_core import _build_ollama_payload target_url = build_chat_url(base) - h = _provider_headers(api_key, base) + h = build_headers(api_key, base) h["Content-Type"] = "application/json" payload = _build_ollama_payload(model_id, messages, 0.0, 5, stream=False, tools=_test_tools) else: target_url = build_chat_url(base) - h = _provider_headers(api_key, base) + h = build_headers(api_key, base) h["Content-Type"] = "application/json" from src.llm_core import _uses_max_completion_tokens _max_key = "max_completion_tokens" if _uses_max_completion_tokens(model_id) else "max_tokens" @@ -308,7 +269,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis base = resolve_url(_normalize_base(base_url)) if _detect_provider(base) == "anthropic": # Try Anthropic's /v1/models endpoint first - url = _anthropic_api_root(base) + "/v1/models" + url = build_models_url(base) headers = {"anthropic-version": "2023-06-01"} if api_key: headers["x-api-key"] = api_key @@ -331,8 +292,8 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis return [] logger.warning(f"Anthropic /v1/models failed, using hardcoded list: {e}") return list(ANTHROPIC_MODELS) - url = _models_url(base) - headers = _provider_headers(api_key, base) + url = build_models_url(base) + headers = build_headers(api_key, base) try: r = httpx.get(url, headers=headers, timeout=timeout) r.raise_for_status() @@ -746,8 +707,8 @@ def setup_model_routes(model_discovery): entry["error"] = str(e) entry["model_count"] = 0 else: - url = _models_url(base) - headers = _provider_headers(ep.api_key, base) + url = build_models_url(base) + headers = build_headers(ep.api_key, base) try: t0 = _time.time() r = httpx.get(url, headers=headers, timeout=5) @@ -971,11 +932,6 @@ def setup_model_routes(model_discovery): shared: str = Form("true"), ): require_admin(request) - base_url = base_url.strip().rstrip("/") - # Normalize: strip trailing /models, /chat/completions, /v1/messages etc to get clean base - for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]: - if base_url.endswith(suffix): - base_url = base_url[:-len(suffix)].rstrip("/") base_url = _normalize_base(base_url) if not base_url: raise HTTPException(400, "Base URL is required") @@ -1085,10 +1041,7 @@ def setup_model_routes(model_discovery): api_key: str = Form(""), ): require_admin(request) - base_url = base_url.strip().rstrip("/") - for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]: - if base_url.endswith(suffix): - base_url = base_url[:-len(suffix)].rstrip("/") + base_url = _normalize_base(base_url) if not base_url: raise HTTPException(400, "Base URL is required") from src.endpoint_resolver import resolve_url diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py index aec81a8f7..f0cd16327 100644 --- a/src/endpoint_resolver.py +++ b/src/endpoint_resolver.py @@ -12,7 +12,7 @@ from typing import Optional, Tuple, Dict from urllib.parse import urlparse, urlunparse from src.database import SessionLocal, ModelEndpoint -from src.llm_core import _detect_provider +from src.llm_core import _detect_provider, _host_match logger = logging.getLogger(__name__) @@ -145,8 +145,7 @@ def normalize_base(url: str) -> str: def _anthropic_api_root(base: str) -> str: """Return Anthropic's API root, preserving /v1 for OpenAI-compatible APIs elsewhere.""" base = (base or "").strip().rstrip("/") - host = urlparse(base).hostname or "" - if host.endswith("anthropic.com") and base.endswith("/v1"): + if _host_match(base, "anthropic.com") and base.endswith("/v1"): return base[:-3].rstrip("/") return base @@ -155,11 +154,10 @@ def _ollama_api_root(base: str) -> str: """Return the native Ollama API root, adding /api for ollama.com hosts.""" base = (base or "").strip().rstrip("/") parsed = urlparse(base) - host = parsed.hostname or "" path = (parsed.path or "").rstrip("/") if path.endswith("/api"): return base - if host.endswith("ollama.com"): + if _host_match(base, "ollama.com"): root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com" return root.rstrip("/") + "/api" return base @@ -169,10 +167,9 @@ def build_chat_url(base: str) -> str: """Return the correct chat endpoint URL for a given base.""" base = resolve_url(base) provider = _detect_provider(base) - host = urlparse(base).hostname or "" - if provider == "anthropic" or host.endswith("anthropic.com"): + if provider == "anthropic": return _anthropic_api_root(base) + "/v1/messages" - if provider == "ollama" or host.endswith("ollama.com"): + if provider == "ollama": return _ollama_api_root(base) + "/chat" return base + "/chat/completions" @@ -181,10 +178,9 @@ def build_models_url(base: str) -> str: """Return the provider-specific model-list endpoint URL for a base.""" base = resolve_url(base) provider = _detect_provider(base) - host = urlparse(base).hostname or "" - if provider == "anthropic" or host.endswith("anthropic.com"): + if provider == "anthropic": return _anthropic_api_root(base) + "/v1/models" - if provider == "ollama" or host.endswith("ollama.com"): + if provider == "ollama": return _ollama_api_root(base) + "/tags" return base + "/models" @@ -231,24 +227,28 @@ def resolve_endpoint( except Exception: return fallback_url, fallback_model, fallback_headers - ep_id = (get_user_setting(f"{setting_prefix}_endpoint_id", owner or "", settings.get(f"{setting_prefix}_endpoint_id", "")) or "").strip() - model = (get_user_setting(f"{setting_prefix}_model", owner or "", settings.get(f"{setting_prefix}_model", "")) or "").strip() + owner_str = owner or "" + def _stg(key: str) -> str: + return (get_user_setting(key, owner_str, settings.get(key, "")) or "").strip() + + ep_id = _stg(f"{setting_prefix}_endpoint_id") + model = _stg(f"{setting_prefix}_model") # Unset Utility means "same as Default Chat Model". This keeps background # features usable out of the box and lets users override Utility only when # they explicitly want a separate cheaper/faster model. if setting_prefix == "utility" and not ep_id: - ep_id = (get_user_setting("default_endpoint_id", owner or "", settings.get("default_endpoint_id", "")) or "").strip() - model = (get_user_setting("default_model", owner or "", settings.get("default_model", "")) or "").strip() + ep_id = _stg("default_endpoint_id") + model = _stg("default_model") # Fall back to utility model for task/research/auto-naming if not specifically configured. # If Utility itself is unset, the block above makes that resolve to Default Chat. if not ep_id and setting_prefix != "utility": - ep_id = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip() - model = (get_user_setting("utility_model", owner or "", settings.get("utility_model", "")) or "").strip() + ep_id = _stg("utility_endpoint_id") + model = _stg("utility_model") if not ep_id: - ep_id = (get_user_setting("default_endpoint_id", owner or "", settings.get("default_endpoint_id", "")) or "").strip() - model = (get_user_setting("default_model", owner or "", settings.get("default_model", "")) or "").strip() + ep_id = _stg("default_endpoint_id") + model = _stg("default_model") if not ep_id: return fallback_url, fallback_model, fallback_headers @@ -342,7 +342,8 @@ def resolve_utility_fallback_candidates(owner: Optional[str] = None) -> list: try: from src.settings import get_user_setting, load_settings settings = load_settings() - if not (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip(): + utility_ep = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip() + if not utility_ep: return _resolve_fallback_candidates("default_model_fallbacks", owner=owner) except Exception: pass diff --git a/src/llm_core.py b/src/llm_core.py index 0d4ddc5d8..f77f3bb3c 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -163,7 +163,7 @@ def _is_ollama_native_url(url: str) -> bool: return False host = parsed.hostname or "" path = (parsed.path or "").rstrip("/") - if host.endswith("ollama.com"): + if _host_match(url, "ollama.com"): return True local_ollama_host = host in {"localhost", "127.0.0.1", "0.0.0.0", "::1"} or parsed.port == 11434 return local_ollama_host and (path == "/api" or path.startswith("/api/")) @@ -173,7 +173,6 @@ def _ollama_api_root(url: str) -> str: """Return a native Ollama API root such as https://ollama.com/api.""" url = (url or "").strip().rstrip("/") parsed = urlparse(url) - host = parsed.hostname or "" path = (parsed.path or "").rstrip("/") if path.endswith("/api/chat"): return url[: -len("/chat")] @@ -183,7 +182,7 @@ def _ollama_api_root(url: str) -> str: return url[: -len("/generate")] if path.endswith("/api"): return url - if host.endswith("ollama.com"): + if _host_match(url, "ollama.com"): root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com" return root.rstrip("/") + "/api" return url @@ -225,16 +224,43 @@ def _parse_ollama_response(data: dict) -> str: return message.get("content") or data.get("response") or "" +def _host_match(url: str, *domains: str) -> bool: + """Return True if url's hostname equals any of `domains` or is a subdomain of one. + + Used by helpers that want "is this Anthropic?" / "is this OpenRouter?" + style checks. Prefer this over substring matching on the URL: the + substring form gives wrong answers for unrelated paths or query strings + that happen to contain the domain text. + """ + if not url: + return False + try: + # rstrip(".") so a fully-qualified host with a trailing dot + # ("api.anthropic.com.") still matches "anthropic.com". + host = (urlparse(url).hostname or "").lower().rstrip(".") + except Exception: + return False + if not host: + return False + return any(host == d or host.endswith("." + d) for d in domains) + + def _detect_provider(url: str) -> str: - """Detect API provider from URL.""" - u = (url or "").lower() + """Detect the API provider from a configured endpoint URL. + + Matches on hostname (exact or subdomain) rather than substring, so a URL + that merely contains a provider's domain in its path or query — or a + look-alike host such as ``anthropic.com.example`` — is not misclassified. + Unknown hosts fall back to the OpenAI-compatible default, which the + majority of providers implement. + """ if _is_ollama_native_url(url): return "ollama" - if "anthropic.com" in u: + if _host_match(url, "anthropic.com"): return "anthropic" - if "openrouter.ai" in u: + if _host_match(url, "openrouter.ai"): return "openrouter" - if "groq.com" in u: + if _host_match(url, "groq.com"): return "groq" return "openai" @@ -251,26 +277,27 @@ def _provider_headers(provider: str, headers: Optional[Dict] = None) -> Dict[str def _provider_label(url: str) -> str: """Human-friendly provider name for error messages.""" - u = (url or "").lower() - if "anthropic.com" in u: return "Anthropic" - if "ollama.com" in u: return "Ollama Cloud" - if "api.x.ai" in u or "x.ai/" in u: return "xAI" - if "openai.com" in u: return "OpenAI" - if "openrouter.ai" in u: return "OpenRouter" - if "groq.com" in u: return "Groq" - if "mistral.ai" in u: return "Mistral" - if "deepseek.com" in u: return "DeepSeek" - if "googleapis.com" in u or "generativelanguage" in u: return "Google" - if "together.xyz" in u or "together.ai" in u: return "Together" - if "fireworks.ai" in u: return "Fireworks" - if "ollama" in u or ":11434" in u: return "Ollama" - if "localhost" in u or "127.0.0.1" in u: return "local endpoint" + if not url: + return "provider" + if _host_match(url, "anthropic.com"): return "Anthropic" + if _host_match(url, "ollama.com"): return "Ollama Cloud" + if _host_match(url, "x.ai"): return "xAI" + if _host_match(url, "openai.com"): return "OpenAI" + if _host_match(url, "openrouter.ai"): return "OpenRouter" + if _host_match(url, "groq.com"): return "Groq" + if _host_match(url, "mistral.ai"): return "Mistral" + if _host_match(url, "deepseek.com"): return "DeepSeek" + if _host_match(url, "googleapis.com"): return "Google" + if _host_match(url, "together.xyz", "together.ai"): return "Together" + if _host_match(url, "fireworks.ai"): return "Fireworks" + if _is_ollama_native_url(url): return "Ollama" try: - from urllib.parse import urlparse - host = urlparse(url).hostname or "provider" - return host + host = (urlparse(url).hostname or "").lower() except Exception: return "provider" + if host in {"localhost", "127.0.0.1", "::1", "0.0.0.0"}: + return "local endpoint" + return host or "provider" def _format_upstream_error(status: int, body: bytes | str, url: str) -> str: diff --git a/tests/test_provider_detection.py b/tests/test_provider_detection.py new file mode 100644 index 000000000..fb53291bf --- /dev/null +++ b/tests/test_provider_detection.py @@ -0,0 +1,136 @@ +"""Provider detection tests (re: #768). + +These import the *real* helpers from ``src.llm_core`` (not local copies) so a +regression in hostname matching is actually caught. The point of the change +under test is that provider detection keys off the URL's *hostname*, not a +substring of the whole URL — so a domain appearing in a path/query, or a +look-alike host, must not be misclassified. +""" +import pytest + +from src import llm_core +from src import endpoint_resolver +from src.endpoint_resolver import build_chat_url, build_models_url + + +class TestHostMatch: + def test_exact_host(self): + assert llm_core._host_match("https://anthropic.com/v1", "anthropic.com") + + def test_subdomain(self): + assert llm_core._host_match("https://api.anthropic.com/v1", "anthropic.com") + + def test_multiple_domains(self): + assert llm_core._host_match("https://api.together.ai/v1", "together.xyz", "together.ai") + + def test_trailing_dot_fqdn(self): + # A fully-qualified host with a trailing dot is legal and resolvable. + assert llm_core._host_match("https://api.anthropic.com./v1", "anthropic.com") + + def test_domain_in_path_does_not_match(self): + assert not llm_core._host_match("https://myproxy.internal/anthropic.com/v1", "anthropic.com") + + def test_domain_in_query_does_not_match(self): + assert not llm_core._host_match("https://example.com/v1?ref=anthropic.com", "anthropic.com") + + def test_lookalike_host_does_not_match(self): + assert not llm_core._host_match("https://anthropic.com.example/v1", "anthropic.com") + + def test_none_and_empty_safe(self): + assert not llm_core._host_match(None, "anthropic.com") + assert not llm_core._host_match("", "anthropic.com") + + +class TestDetectProviderRealHosts: + def test_anthropic(self): + assert llm_core._detect_provider("https://api.anthropic.com") == "anthropic" + + def test_openrouter(self): + assert llm_core._detect_provider("https://openrouter.ai/api/v1") == "openrouter" + + def test_groq_openai_compat_path(self): + # Groq's base carries an /openai/v1 path; detection must still see the host. + assert llm_core._detect_provider("https://api.groq.com/openai/v1") == "groq" + + def test_ollama_native_unchanged(self): + assert llm_core._detect_provider("https://ollama.com/api") == "ollama" + + def test_unknown_host_defaults_to_openai(self): + assert llm_core._detect_provider("https://api.example.com/v1") == "openai" + + +class TestDetectProviderRejectsSubstringFalsePositives: + """The regression that motivated #768: substring matching mislabeled these.""" + + def test_provider_domain_in_path(self): + assert llm_core._detect_provider("https://myproxy.internal/anthropic.com/v1") == "openai" + + def test_provider_domain_in_query(self): + assert llm_core._detect_provider("https://example.com/v1?ref=anthropic.com") == "openai" + + def test_lookalike_host(self): + assert llm_core._detect_provider("https://anthropic.com.example/v1") == "openai" + + def test_none_safe(self): + assert llm_core._detect_provider(None) == "openai" + + +class TestBuildersRejectLookalikeHosts: + """build_chat_url / build_models_url must route look-alike and + domain-in-path hosts to the OpenAI-compatible default, not the + anthropic/ollama branches. Before #815's follow-up these builders still + fell back to ``host.endswith("anthropic.com")`` style checks, so + ``notanthropic.com`` was misrouted to the Anthropic messages endpoint. + """ + + @pytest.fixture(autouse=True) + def _stub_dns(self, monkeypatch): + # build_* call resolve_url(), which does real DNS + tailscale lookups. + # Provider routing is independent of name resolution, so stub it out to + # keep these deterministic and offline. + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda u: u) + + def test_real_anthropic_chat(self): + assert build_chat_url("https://api.anthropic.com") == "https://api.anthropic.com/v1/messages" + + def test_lookalike_anthropic_chat_is_openai(self): + assert build_chat_url("https://notanthropic.com") == "https://notanthropic.com/chat/completions" + + def test_lookalike_anthropic_models_is_openai(self): + assert build_models_url("https://anthropic.com.evil.com") == "https://anthropic.com.evil.com/models" + + def test_anthropic_domain_in_path_is_openai(self): + assert build_chat_url("https://myproxy.internal/anthropic.com/v1") == "https://myproxy.internal/anthropic.com/v1/chat/completions" + + def test_real_ollama_chat(self): + assert build_chat_url("https://ollama.com") == "https://ollama.com/api/chat" + + def test_lookalike_ollama_chat_is_openai(self): + assert build_chat_url("https://notollama.com") == "https://notollama.com/chat/completions" + + def test_lookalike_ollama_models_is_openai(self): + assert build_models_url("https://notollama.com") == "https://notollama.com/models" + + +class TestBuildersLocalAndDockerEndpoints: + """Local and docker endpoints must keep working after the hostname change: + a local ``/v1`` base stays OpenAI-compatible, and a native Ollama ``/api`` + path is still detected by path even on a non-ollama.com host such as + host.docker.internal. + """ + + @pytest.fixture(autouse=True) + def _stub_dns(self, monkeypatch): + monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda u: u) + + def test_local_v1_chat_is_openai_compatible(self): + assert build_chat_url("http://localhost:8000/v1") == "http://localhost:8000/v1/chat/completions" + + def test_local_v1_models_is_openai_compatible(self): + assert build_models_url("http://127.0.0.1:1234/v1") == "http://127.0.0.1:1234/v1/models" + + def test_docker_internal_ollama_api_path_is_native_chat(self): + assert build_chat_url("http://host.docker.internal:11434/api") == "http://host.docker.internal:11434/api/chat" + + def test_docker_internal_ollama_api_path_is_native_models(self): + assert build_models_url("http://host.docker.internal:11434/api") == "http://host.docker.internal:11434/api/tags" From 784e60fc6639ba4703ab2139b76f051f9f98e835 Mon Sep 17 00:00:00 2001 From: Rolly Calma <115199279+Ghraven@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:12:02 +0800 Subject: [PATCH 017/496] chore: use explicit utf-8 for action state files (#819) --- src/builtin_actions.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/builtin_actions.py b/src/builtin_actions.py index 77f4582c4..1e659572c 100644 --- a/src/builtin_actions.py +++ b/src/builtin_actions.py @@ -359,7 +359,7 @@ async def action_tidy_calendar(owner: str, **kwargs) -> Tuple[str, bool]: last_watermark = None try: if STATE_FILE.exists(): - saved = json.loads(STATE_FILE.read_text()) + saved = json.loads(STATE_FILE.read_text(encoding="utf-8")) if saved.get("last_created_at"): last_watermark = datetime.fromisoformat(saved["last_created_at"]) except Exception: @@ -420,7 +420,7 @@ async def action_tidy_calendar(owner: str, **kwargs) -> Tuple[str, bool]: "last_run_at": datetime.utcnow().isoformat(), "scanned": len(events), "removed": len(removed), - }, indent=2)) + }, indent=2), encoding="utf-8") except Exception as se: logger.warning(f"tidy_calendar watermark save failed: {se}") @@ -1469,7 +1469,7 @@ async def action_ping_notes(owner: str, **kwargs) -> Tuple[str, bool]: _legacy = _P("data/note_pings.json") if _legacy.exists() and not STATE.exists(): try: - STATE.write_text(_legacy.read_text()) + STATE.write_text(_legacy.read_text(encoding="utf-8"), encoding="utf-8") except Exception: pass # Scanner ticks every 60s in _note_pings_loop. 90s window guarantees @@ -1494,7 +1494,7 @@ async def action_ping_notes(owner: str, **kwargs) -> Tuple[str, bool]: return None try: - cache = _json.loads(STATE.read_text()) if STATE.exists() else {} + cache = _json.loads(STATE.read_text(encoding="utf-8")) if STATE.exists() else {} except Exception: cache = {} @@ -1571,7 +1571,7 @@ async def action_ping_notes(owner: str, **kwargs) -> Tuple[str, bool]: cache.pop(stale, None) try: - STATE.write_text(_json.dumps(cache)) + STATE.write_text(_json.dumps(cache), encoding="utf-8") except Exception as e: logger.warning(f"ping_notes: cache write failed: {e}") @@ -1676,7 +1676,7 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]: for acc in accounts: cache_file = CACHE_DIR / f"{acc.id}.json" try: - cache = _json.loads(cache_file.read_text()) if cache_file.exists() else {"uids": {}} + cache = _json.loads(cache_file.read_text(encoding="utf-8")) if cache_file.exists() else {"uids": {}} except Exception: cache = {"uids": {}} @@ -1918,7 +1918,7 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]: cache_uids.pop(stale, None) try: - cache_file.write_text(_json.dumps(cache)) + cache_file.write_text(_json.dumps(cache), encoding="utf-8") except Exception as e: logger.warning(f"urgency: cache write failed for {acc.id}: {e}") @@ -2003,7 +2003,7 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]: # Load prior state to know which urgent UIDs we've already notified. try: - prior = _json.loads(STATE_PATH.read_text()) if STATE_PATH.exists() else {} + prior = _json.loads(STATE_PATH.read_text(encoding="utf-8")) if STATE_PATH.exists() else {} except Exception: prior = {} notified_uids = set(prior.get("notified_uids", [])) @@ -2087,7 +2087,7 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]: "notified_uids": sorted(notified_uids), } try: - STATE_PATH.write_text(_json.dumps(state)) + STATE_PATH.write_text(_json.dumps(state), encoding="utf-8") except Exception as e: logger.warning(f"urgency: state write failed: {e}") From f65c89e02e088a99667622b5db0661086a28a97b Mon Sep 17 00:00:00 2001 From: Rolly Calma <115199279+Ghraven@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:12:13 +0800 Subject: [PATCH 018/496] chore: use explicit utf-8 for shell job files (#820) --- routes/shell_routes.py | 7 ++++--- src/bg_jobs.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/routes/shell_routes.py b/routes/shell_routes.py index c791b1219..da8c9a327 100644 --- a/routes/shell_routes.py +++ b/routes/shell_routes.py @@ -469,7 +469,8 @@ async def _generate_tmux(cmd: str, request: Request): f"EC=${{PIPESTATUS[0]}}\n" f"echo ':::EXIT_CODE:::'$EC >> '{log_path}'\n" f"rm -f '{script_path}'\n" - f"exit $EC\n" + f"exit $EC\n", + encoding="utf-8", ) script_path.chmod(0o755) logger.info("tmux wrapper script created: session=%s path=%s", session_id, script_path) @@ -504,7 +505,7 @@ async def _generate_tmux(cmd: str, request: Request): # Read new lines from log try: if log_path.exists(): - lines = log_path.read_text(errors="replace").splitlines() + lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines() new_lines = lines[lines_sent:] for line in new_lines: if line.startswith(":::EXIT_CODE:::"): @@ -532,7 +533,7 @@ async def _generate_tmux(cmd: str, request: Request): # Session ended — do one final read await asyncio.sleep(0.5) if log_path.exists(): - lines = log_path.read_text(errors="replace").splitlines() + lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines() for line in lines[lines_sent:]: if line.startswith(":::EXIT_CODE:::"): try: diff --git a/src/bg_jobs.py b/src/bg_jobs.py index a770f11d9..7605d5bc0 100644 --- a/src/bg_jobs.py +++ b/src/bg_jobs.py @@ -195,7 +195,7 @@ def refresh() -> Dict[str, Dict[str, Any]]: exit_path = Path(rec.get("exit_path", "")) if exit_path.exists(): try: - code = int(exit_path.read_text().strip() or "1") + code = int(exit_path.read_text(encoding="utf-8", errors="replace").strip() or "1") except Exception: code = 1 rec["exit_code"] = code From 8e918dfdbb88987872f5abb6035e638e1db0e09c Mon Sep 17 00:00:00 2001 From: CocoLng <15895046+CocoLng@users.noreply.github.com> Date: Tue, 2 Jun 2026 04:12:54 +0200 Subject: [PATCH 019/496] Ignore AltGr keystrokes in Ctrl+Alt keyboard shortcuts (#825) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Ignore AltGr keystrokes in Ctrl+Alt keyboard shortcuts Browsers report AltGr (right Alt on AZERTY/QWERTZ and most non-US layouts, used to type @ # { } [ ] | \ and the euro sign) as ctrlKey+altKey. The default keybinds map destructive actions to Ctrl+Alt+ (delete_session, new_session, incognito, open_calendar), so a non-US user typing a special character could silently fire them. Guard the shortcut matcher, the editor keydown handler, and the rebind capture with getModifierState('AltGraph'), which is true for AltGr but false for a genuine left Ctrl+Alt. macOS is excluded: there the Option key legitimately sets AltGraph and there is no AltGr/Ctrl+Alt collision to guard against, so the guard would otherwise break Ctrl+Option / Cmd+Option shortcuts (notably in Firefox). The detection lives in one place — isAltGrEvent / IS_MAC in static/js/platform.js — and all three call sites route through it, so the guards can't drift apart. The editor handler only skips the Ctrl+Alt chord block, so layout shortcuts reachable via AltGr (e.g. [ ] brush size = AltGr+5/+8 on AZERTY) keep working. * Require Ctrl+Alt for the AltGr guard and consolidate keybind test marks isAltGrEvent now also checks ctrlKey+altKey so it only suppresses the "AltGr reported as Ctrl+Alt" collision; an event asserting AltGraph on its own (a Linux ISO_Level3_Shift layout, a stray modifier) is left alone. Pin it with test_isaltgr_false_when_altgraph_set_but_not_ctrl_alt. Collapse the 12 per-test node skipif marks into one module-level pytestmark, and note in platform.js why IS_MAC intentionally covers iPad/iPhone and mirrors the isMac checks in calendar.js / sessions.js. --- static/js/editor/keyboard-shortcuts.js | 7 +- static/js/keyboard-shortcuts.js | 7 +- static/js/platform.js | 47 +++++++ static/js/settings.js | 5 + tests/test_keybind_altgr_js.py | 183 +++++++++++++++++++++++++ 5 files changed, 247 insertions(+), 2 deletions(-) create mode 100644 static/js/platform.js create mode 100644 tests/test_keybind_altgr_js.py diff --git a/static/js/editor/keyboard-shortcuts.js b/static/js/editor/keyboard-shortcuts.js index 0f83ac1f3..2f9ed7472 100644 --- a/static/js/editor/keyboard-shortcuts.js +++ b/static/js/editor/keyboard-shortcuts.js @@ -50,6 +50,7 @@ * }} deps */ import { state } from './state.js'; +import { isAltGrEvent } from '../platform.js'; export function wireKeyboardShortcuts(deps) { const { @@ -79,7 +80,11 @@ export function wireKeyboardShortcuts(deps) { return; } if (e.key === 'Escape') return; - if (e.ctrlKey || e.metaKey) { + // Skip the Ctrl+Alt editor chords for an AltGr keystroke (see platform.js); + // only the chord block is skipped, so the layout-character handlers below + // still act — AltGr+5 / AltGr+8 stay as the [ ] brush-size shortcut on + // AZERTY / QWERTZ. + if ((e.ctrlKey || e.metaKey) && !isAltGrEvent(e)) { if (e.key === 'z') { e.preventDefault(); if (e.shiftKey) redo(); else undo(); } // Ctrl+Shift+D = Deselect: clears the wand selection (and // lasso if active) without affecting layers. diff --git a/static/js/keyboard-shortcuts.js b/static/js/keyboard-shortcuts.js index 2252017d6..6599ed4c2 100644 --- a/static/js/keyboard-shortcuts.js +++ b/static/js/keyboard-shortcuts.js @@ -2,6 +2,8 @@ // Keyboard Shortcuts — dynamic keybinds // ============================================ +import { IS_MAC, isAltGrEvent } from './platform.js'; + const _defaultKeybinds = { search: 'ctrl+k', toggle_sidebar: 'ctrl+alt+b', new_session: 'ctrl+alt+n', fav_session: 'ctrl+alt+f', delete_session: 'ctrl+alt+d', @@ -13,8 +15,11 @@ const _defaultKeybinds = { open_notes: '', open_tasks: '', open_theme: '', }; -function _matchesCombo(e, combo) { +export function _matchesCombo(e, combo, isMac = IS_MAC) { if (!combo) return false; + // Drop AltGr keystrokes so typing characters on non-US layouts can't fire a + // Ctrl+Alt shortcut — e.g. the destructive delete_session. See platform.js. + if (isAltGrEvent(e, isMac)) return false; const parts = combo.split('+'); const needCtrl = parts.includes('ctrl'); const needAlt = parts.includes('alt'); diff --git a/static/js/platform.js b/static/js/platform.js new file mode 100644 index 000000000..e0d7747df --- /dev/null +++ b/static/js/platform.js @@ -0,0 +1,47 @@ +// ============================================ +// Platform detection + AltGr-keystroke helper +// ============================================ +// Shared by the keybind code: root keyboard-shortcuts.js, the editor's +// keyboard-shortcuts.js, and settings.js. Single source of truth so the three +// guards can't drift. + +// AltGr (right Alt on AZERTY/QWERTZ and most non-US layouts, used to type +// @ # { } [ ] | \ and €) is reported by browsers as Ctrl+Alt. macOS is the +// exception: there the Option key — a normal part of Mac shortcuts — also sets +// the AltGraph modifier state, so it must NOT be treated as AltGr. +// +// IS_MAC covers all Apple platforms, iPad/iPhone included: a Magic Keyboard's +// Option key sets AltGraph exactly like a Mac's, so they need the same carve-out +// — narrowing to macOS-only would re-break them. The name and the +// /Mac|iPhone|iPad/ test deliberately mirror the existing isMac checks in +// calendar.js and sessions.js; this is their single shared source of truth. +export const IS_MAC = + /Mac|iPhone|iPad/.test((typeof navigator !== 'undefined' && navigator.platform) || '') || + /Mac/.test((typeof navigator !== 'undefined' && navigator.userAgent) || ''); + +// True when `e` is an AltGr keystroke we should ignore for Ctrl+Alt shortcut +// purposes. getModifierState('AltGraph') is true for AltGr but false for a +// genuine left Ctrl+Alt, so real shortcuts still work. Always false on macOS, +// where Option legitimately sets AltGraph. +// +// We also require ctrlKey+altKey: the collision we defend against is precisely +// "AltGr reported AS Ctrl+Alt", so an event that asserts AltGraph WITHOUT +// presenting as Ctrl+Alt (a Linux ISO_Level3_Shift layout, a stray modifier +// state) is left alone instead of being swallowed. +// +// Trade-off: on Windows AltGr *is* Ctrl+right-Alt, so a deliberate +// Ctrl+Alt+ shortcut typed via AltGr is unreachable too — accepted; use +// the left Ctrl+Alt. +// +// NOTE: the AltGr -> AltGraph mapping is taken from the UI Events spec / MDN, +// not proven by our tests. Older Firefox and some Linux setups historically did +// not report AltGraph; where a browser sets ctrlKey+altKey without it this +// guard is simply a no-op (the pre-fix behaviour) rather than a regression. +export function isAltGrEvent(e, isMac = IS_MAC) { + return ( + !isMac && + !!e.ctrlKey && + !!e.altKey && + !!(e.getModifierState && e.getModifierState('AltGraph')) + ); +} diff --git a/static/js/settings.js b/static/js/settings.js index 6f04140b7..0a63dd258 100644 --- a/static/js/settings.js +++ b/static/js/settings.js @@ -6,6 +6,7 @@ import searchModule from './search.js'; import { makeWindowDraggable } from './windowDrag.js'; import { clearDockSide } from './modalSnap.js'; import { sortModelIds } from './modelSort.js'; +import { isAltGrEvent } from './platform.js'; let initialized = false; let modalEl = null; @@ -1710,6 +1711,10 @@ function _formatKeyCaps(combo) { } function _comboFromEvent(e) { + // Drop a stray AltGr keystroke (e.g. AltGr+E to type €) so it isn't recorded + // as a bogus ctrl+alt+ binding — onKey ignores empty combos. See + // platform.js for the macOS carve-out and Windows trade-off. + if (isAltGrEvent(e)) return ''; const parts = []; if (e.ctrlKey || e.metaKey) parts.push('ctrl'); if (e.altKey) parts.push('alt'); diff --git a/tests/test_keybind_altgr_js.py b/tests/test_keybind_altgr_js.py new file mode 100644 index 000000000..a93538d6e --- /dev/null +++ b/tests/test_keybind_altgr_js.py @@ -0,0 +1,183 @@ +"""Pin the AltGr-safety of the shared keybind predicate and the matcher. + +Driven through `node --input-type=module` so we exercise the real JS without a +full Vitest/Jest setup (same approach as test_compare_js.py / +test_reply_recipients_js.py). Skips when `node` is not installed rather than +failing. + +Bug: browsers report the AltGr key (right Alt, essential on AZERTY/QWERTZ and +many non-US layouts to type @ # { } [ ] | \\ and €) as ctrlKey=true AND +altKey=true, so a user on a non-US layout typing a special character could +silently fire a destructive ctrl+alt+ default (new_session, +delete_session, incognito, open_calendar). getModifierState('AltGraph') is true +for AltGr but false for a genuine left Ctrl+Alt — except on macOS, where the +Option key also sets it. + +The guard now lives in ONE place — `isAltGrEvent` in static/js/platform.js — and +all three call sites (editor keyboard-shortcuts.js, root keyboard-shortcuts.js, +settings.js) route through it. So these tests pin the shared *predicate* +directly (both the isMac arg and the navigator-derived IS_MAC default), plus the +`_matchesCombo` integration. They do NOT prove that real browsers actually set +AltGraph for AltGr — that mapping is taken from the UI Events spec / MDN; older +Firefox and some Linux setups historically did not report it (the guard is a +no-op there, i.e. pre-fix behaviour, not a regression). +""" +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + +_REPO = Path(__file__).resolve().parent.parent +_HELPER = _REPO / "static" / "js" / "keyboard-shortcuts.js" +_PLATFORM = _REPO / "static" / "js" / "platform.js" +_HAS_NODE = shutil.which("node") is not None + +# Every test here shells out to `node`; skip the whole module when it is absent +# rather than repeating the mark per test (same convention as test_compare_js.py +# / test_reply_recipients_js.py). +pytestmark = pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") + + +def _run(js: str) -> str: + proc = subprocess.run( + ["node", "--input-type=module"], + input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30, + ) + assert proc.returncode == 0, proc.stderr + return proc.stdout.strip() + + +def _is_altgr( + altgraph: bool, + is_mac: bool = False, + has_modifier_state: bool = True, + ctrl: bool = True, + alt: bool = True, +) -> bool: + """Return isAltGrEvent(ev, is_mac) — the predicate every guard routes through.""" + modifier = ( + f"ev.getModifierState = (m) => m === 'AltGraph' ? {json.dumps(altgraph)} : false;" + if has_modifier_state else "") + js = f""" + import {{ isAltGrEvent }} from '{_PLATFORM.as_uri()}'; + const ev = {{ ctrlKey: {json.dumps(ctrl)}, altKey: {json.dumps(alt)} }}; + {modifier} + console.log(JSON.stringify(isAltGrEvent(ev, {json.dumps(is_mac)}))); + """ + return json.loads(_run(js)) + + +def _is_mac_default(platform: str = "", user_agent: str = "") -> bool: + """Return platform.js IS_MAC as derived from a stubbed navigator at import time.""" + # Node >=21 exposes a read-only global `navigator`, so assignment throws; + # defineProperty (configurable) overrides it for the import-time read. + js = f""" + Object.defineProperty(globalThis, 'navigator', {{ + value: {{ platform: {json.dumps(platform)}, userAgent: {json.dumps(user_agent)} }}, + configurable: true, + }}); + const {{ IS_MAC }} = await import('{_PLATFORM.as_uri()}'); + console.log(JSON.stringify(IS_MAC)); + """ + return json.loads(_run(js)) + + +def _matches(event: dict, combo: str, altgraph: bool, is_mac: bool = False) -> bool: + """Return _matchesCombo(event, combo, is_mac) with AltGraph active or not.""" + js = f""" + import {{ _matchesCombo }} from '{_HELPER.as_uri()}'; + const ev = {json.dumps(event)}; + ev.getModifierState = (m) => m === 'AltGraph' ? {json.dumps(altgraph)} : false; + console.log(JSON.stringify(_matchesCombo(ev, {json.dumps(combo)}, {json.dumps(is_mac)}))); + """ + return json.loads(_run(js)) + + +# --- The shared predicate (covers all three guards) -------------------------- + +def test_isaltgr_true_for_altgr_keystroke_off_mac(): + # AZERTY/QWERTZ user holds AltGr: browser sets ctrlKey+altKey+AltGraph. + assert _is_altgr(altgraph=True, is_mac=False) is True + + +def test_isaltgr_false_for_genuine_ctrl_alt(): + # A real left Ctrl+Alt press leaves AltGraph unset. + assert _is_altgr(altgraph=False, is_mac=False) is False + + +def test_isaltgr_false_when_altgraph_set_but_not_ctrl_alt(): + # The collision we defend against is specifically "AltGr reported AS + # Ctrl+Alt". An event that asserts AltGraph WITHOUT presenting as Ctrl+Alt + # (e.g. a Linux ISO_Level3_Shift layout, or a stray modifier state) must NOT + # be swallowed — only a genuine Ctrl+Alt-presenting AltGr keystroke is. + assert _is_altgr(altgraph=True, ctrl=False, alt=False) is False + assert _is_altgr(altgraph=True, ctrl=True, alt=False) is False + assert _is_altgr(altgraph=True, ctrl=False, alt=True) is False + + +def test_isaltgr_false_on_mac_even_with_altgraph(): + # macOS reports AltGraph=true for the Option key, but Ctrl+Option / Cmd+Option + # are legitimate Mac shortcuts, so the predicate must never swallow them. + assert _is_altgr(altgraph=True, is_mac=True) is False + + +def test_isaltgr_false_when_getmodifierstate_missing(): + # Defensive: an event without getModifierState must not throw or report AltGr. + assert _is_altgr(altgraph=False, is_mac=False, has_modifier_state=False) is False + + +# --- The navigator-derived IS_MAC default (dead in node without a stub) ------- + +def test_is_mac_from_navigator_platform(): + # navigator.platform reports "MacIntel" on EVERY Mac — Apple Silicon + # (M1/M2/M3...) included; the string was frozen for compatibility, so there + # is no "MacARM". The regex matches the "Mac" substring, not "Intel". + assert _is_mac_default(platform="MacIntel") is True + + +def test_is_mac_apple_silicon_reports_macintel(): + # Pin the quirk explicitly: an Apple Silicon Mac's UA still says Macintosh + # and its platform still says MacIntel, so the carve-out protects it too. + assert _is_mac_default( + platform="MacIntel", + user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15", + ) is True + + +def test_is_mac_from_user_agent_when_platform_blank(): + # iPadOS / some browsers report a Mac userAgent with an unhelpful platform. + assert _is_mac_default(platform="", user_agent="Mozilla/5.0 (Macintosh; ...)") is True + + +def test_is_not_mac_on_windows(): + assert _is_mac_default(platform="Win32", user_agent="Mozilla/5.0 (Windows NT 10.0)") is False + + +# --- _matchesCombo integration (the matcher predicate, end to end) ----------- + +def test_altgr_keystroke_does_not_trigger_ctrl_alt_shortcut(): + # AZERTY/QWERTZ user holds AltGr over a key that yields 'n'. This must NOT + # fire the destructive new_session combo. + ev = {"ctrlKey": True, "altKey": True, "shiftKey": False, "key": "n"} + assert _matches(ev, "ctrl+alt+n", altgraph=True, is_mac=False) is False + + +def test_genuine_ctrl_alt_still_matches(): + # A real left Ctrl+Alt press (AltGraph not set) must still work. + ev = {"ctrlKey": True, "altKey": True, "shiftKey": False, "key": "n"} + assert _matches(ev, "ctrl+alt+n", altgraph=False, is_mac=False) is True + + +def test_mac_option_combo_still_matches(): + # macOS reports AltGraph=true for the Option key, but Ctrl+Option / Cmd+Option + # are legitimate Mac shortcuts. On macOS the guard must NOT swallow them. + ev = {"ctrlKey": True, "altKey": True, "shiftKey": False, "key": "n"} + assert _matches(ev, "ctrl+alt+n", altgraph=True, is_mac=True) is True + + +def test_plain_ctrl_shortcut_unaffected(): + # Non-alt combos were never AltGr-ambiguous and must keep matching. + ev = {"ctrlKey": True, "altKey": False, "shiftKey": False, "key": "k"} + assert _matches(ev, "ctrl+k", altgraph=False, is_mac=False) is True From fd04ad353d82f9b5dae05706b1fba6777b9f1b62 Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 2 Jun 2026 12:14:31 +1000 Subject: [PATCH 020/496] Add Anthropic prompt caching to the agent loop (#812) Send `system` as a structured text block with an ephemeral cache_control breakpoint and cache the last tool schema, so multi-round agent runs read the stable system+tools prefix from cache instead of re-billing it. Gate the system breakpoint so tiny tool-less prompts skip the cache-write premium. Log cache_read/creation tokens at message_start. Fixes #791 Co-authored-by: Ethan <23321960+0xLeathery@users.noreply.github.com> --- src/llm_core.py | 27 ++++++++++++++++++++-- tests/test_llm_core_anthropic_cache.py | 32 ++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 tests/test_llm_core_anthropic_cache.py diff --git a/src/llm_core.py b/src/llm_core.py index f77f3bb3c..e639aeead 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -451,7 +451,17 @@ def _build_anthropic_payload(model, messages, temperature, max_tokens, stream=Fa "temperature": temperature, } if system_parts: - payload["system"] = "\n\n".join(system_parts) + system_text = "\n\n".join(system_parts) + # Send `system` as a structured text block so we can attach a prompt-cache + # breakpoint. The agent loop re-sends this same large prefix every round; + # caching it makes Anthropic re-read it from cache (~90% cheaper, lower TTFB) + # instead of re-billing it. Skip caching tiny one-off prompts, where the + # cache-WRITE premium wouldn't pay back (no reuse). Presence of `tools` + # means an agentic/multi-round call, where the prefix is always reused. + system_block = {"type": "text", "text": system_text} + if tools or len(system_text) > 4000: + system_block["cache_control"] = {"type": "ephemeral"} + payload["system"] = [system_block] if stream: payload["stream"] = True # Convert OpenAI-format tools to Anthropic format @@ -466,6 +476,9 @@ def _build_anthropic_payload(model, messages, temperature, max_tokens, stream=Fa "input_schema": fn.get("parameters", {"type": "object", "properties": {}}), }) if anthropic_tools: + # Cache the tool schemas too — they're stable for the whole agent run. + # The breakpoint caches all tool defs preceding it in the request. + anthropic_tools[-1]["cache_control"] = {"type": "ephemeral"} payload["tools"] = anthropic_tools return payload @@ -951,7 +964,17 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl if partial and _anth_tool_blocks[idx].get("name") in ("create_document", "update_document", "edit_document"): yield f'data: {json.dumps({"type": "tool_call_delta", "index": idx, "name": _anth_tool_blocks[idx]["name"], "arg_delta": partial})}\n\n' elif evt == "message_start": - _anth_input_tokens = j.get("message", {}).get("usage", {}).get("input_tokens", 0) + _u = j.get("message", {}).get("usage", {}) + _anth_input_tokens = _u.get("input_tokens", 0) + # Surface prompt-cache effectiveness: cache_read > 0 means the + # stable system+tools prefix was served from cache this round. + _c_read = _u.get("cache_read_input_tokens", 0) + _c_write = _u.get("cache_creation_input_tokens", 0) + if _c_read or _c_write: + logger.info( + "[anthropic-cache] read=%s write=%s fresh_input=%s", + _c_read, _c_write, _anth_input_tokens, + ) elif evt == "message_delta": _anth_output_tokens = j.get("usage", {}).get("output_tokens", 0) elif evt == "message_stop": diff --git a/tests/test_llm_core_anthropic_cache.py b/tests/test_llm_core_anthropic_cache.py new file mode 100644 index 000000000..990b19981 --- /dev/null +++ b/tests/test_llm_core_anthropic_cache.py @@ -0,0 +1,32 @@ +"""Regression tests for Anthropic prompt-cache breakpoints in _build_anthropic_payload (#791).""" +from src import llm_core + + +def _payload(system="sys", user="hi", tools=None): + messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] + return llm_core._build_anthropic_payload("claude", messages, 0.0, 1000, stream=True, tools=tools) + + +def test_agentic_caches_system_and_last_tool(): + tools = [ + {"type": "function", "function": {"name": "a", "description": "x", "parameters": {}}}, + {"type": "function", "function": {"name": "b", "description": "y", "parameters": {}}}, + ] + p = _payload(system="SYS PROMPT " * 50, tools=tools) + assert isinstance(p["system"], list) + assert p["system"][0].get("cache_control") == {"type": "ephemeral"} + assert "cache_control" not in p["tools"][0], "only the LAST tool is a breakpoint" + assert p["tools"][-1].get("cache_control") == {"type": "ephemeral"} + breakpoints = sum("cache_control" in b for b in p["system"]) + sum("cache_control" in t for t in p["tools"]) + assert breakpoints == 2 + + +def test_tiny_tool_less_prompt_not_cached(): + p = _payload(system="hi", tools=None) + assert isinstance(p["system"], list) + assert "cache_control" not in p["system"][0] + + +def test_large_system_only_is_cached(): + p = _payload(system="z" * 5000, tools=None) + assert p["system"][0].get("cache_control") == {"type": "ephemeral"} From b3599d84f791834c0f5b67406b8c77f84efaaa1f Mon Sep 17 00:00:00 2001 From: James Arslan Date: Tue, 2 Jun 2026 04:14:59 +0200 Subject: [PATCH 021/496] Fix drag-and-drop files landing behind the panes in Compare (#818) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Compare each pane renders into a sandboxed