From 57e7229219ae64342bac82abfa45f54a790f6877 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Mon, 22 Jun 2026 02:08:25 +0000 Subject: [PATCH] CI fixes for cookbook workflow sync --- docker-compose.gpu-amd.yml | 9 +++++++++ docker-compose.gpu-nvidia.yml | 10 ++++++++++ routes/model_routes.py | 15 +-------------- src/agent_loop.py | 18 +++++++++++------- static/js/notes.js | 1 + tests/test_gpu_compose_standalone.py | 9 ++++----- tests/test_tool_support_heuristic.py | 2 +- 7 files changed, 37 insertions(+), 27 deletions(-) diff --git a/docker-compose.gpu-amd.yml b/docker-compose.gpu-amd.yml index 82e22e440..5d5f8427e 100644 --- a/docker-compose.gpu-amd.yml +++ b/docker-compose.gpu-amd.yml @@ -28,6 +28,14 @@ services: # land under /app/.local for the odysseus user. Persist them so a # container recreate does not silently remove installed serve engines. - ${APP_DATA_DIR:-./data}/local:/app/.local:z + # Docker socket — lets Cookbook launch commands like + # `docker exec ollama-rocm ollama show ` reach the host's + # Docker daemon (and sibling containers like ollama-rocm / + # ollama-test). The in-container user needs to be in the + # socket's owning group — see `group_add` below; the GID + # there must match the host's `docker` group (defaults to 963 + # on Debian, 999 on Ubuntu — override via env if yours differs). + - /var/run/docker.sock:/var/run/docker.sock extra_hosts: # Lets the container reach local services on the Docker host, including # Ollama at http://host.docker.internal:11434. @@ -93,6 +101,7 @@ services: - /dev/kfd - /dev/dri group_add: + - "${DOCKER_GID:-963}" - video - ${RENDER_GID:-render} diff --git a/docker-compose.gpu-nvidia.yml b/docker-compose.gpu-nvidia.yml index 1b551c669..c1f2cddb0 100644 --- a/docker-compose.gpu-nvidia.yml +++ b/docker-compose.gpu-nvidia.yml @@ -27,6 +27,16 @@ services: # land under /app/.local for the odysseus user. Persist them so a # container recreate does not silently remove installed serve engines. - ${APP_DATA_DIR:-./data}/local:/app/.local:z + # Docker socket — lets Cookbook launch commands like + # `docker exec ollama-rocm ollama show ` reach the host's + # Docker daemon (and sibling containers like ollama-rocm / + # ollama-test). The in-container user needs to be in the + # socket's owning group — see `group_add` below; the GID + # there must match the host's `docker` group (defaults to 963 + # on Debian, 999 on Ubuntu — override via env if yours differs). + - /var/run/docker.sock:/var/run/docker.sock + group_add: + - "${DOCKER_GID:-963}" extra_hosts: # Lets the container reach local services on the Docker host, including # Ollama at http://host.docker.internal:11434. diff --git a/routes/model_routes.py b/routes/model_routes.py index 89636b310..00fdb6eb0 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -896,25 +896,12 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> pass try: - # OpenAI-compatible servers commonly expose /v1/models but return 404 - # for the bare /v1 root. Probe models first for those bases to avoid - # noisy false-looking 404s in llama.cpp logs. - parsed = urlparse(base) - prefer_models_first = (parsed.path or "").rstrip("/").endswith("/v1") - if prefer_models_first: - try: - r0 = httpx.get(_safe_build_models_url(base), headers=headers, timeout=timeout, verify=llm_verify()) - result0 = _result_from_response(r0) - if result0["reachable"]: - return result0 - except Exception as e: - last_error = str(e)[:120] r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify()) result = _result_from_response(r) if result["reachable"]: return result sc = result.get("status_code") or 0 - if 400 <= sc < 500 and sc not in (401, 403) and not prefer_models_first: + if 400 <= sc < 500 and sc not in (401, 403): models_url = _safe_build_models_url(base) try: r2 = httpx.get(models_url, headers=headers,timeout=timeout, verify=llm_verify()) diff --git a/src/agent_loop.py b/src/agent_loop.py index 27a448da4..f753a0b5b 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -2000,6 +2000,7 @@ async def stream_agent_loop( and not bool(_intent.get("continuation")) and not plan_mode and not approved_plan + and not guide_only and (_casual_low_signal_turn or active_document is None) and (_casual_low_signal_turn or not active_email) and (_casual_low_signal_turn or not workspace) @@ -2103,7 +2104,7 @@ async def stream_agent_loop( # RAG-based tool selection: retrieve relevant tools for this query. # If caller provided a pre-computed set (e.g. task_scheduler), use that. - _relevant_tools = set() if guide_only else relevant_tools + _relevant_tools = relevant_tools _t1 = time.time() if _relevant_tools: logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)") @@ -2279,7 +2280,7 @@ async def stream_agent_loop( _model_supports_tools = any(kw in _model_lc for kw in ( "gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma", "qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2", - "llama-3.3", "llama-4", + "llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4", # Local-served models that follow OpenAI-style function calling # via vLLM's `--enable-auto-tool-choice`. Belt-and-suspenders # with the per-endpoint flag above. @@ -2310,7 +2311,6 @@ async def stream_agent_loop( # the fenced-block path is used instead of native function calling. _is_ollama_native = _is_ollama_native_url(endpoint_url or "") _ollama_openai_compat = _is_ollama_openai_compat_url(endpoint_url or "") - _local_openai_compat = _is_local_openai_compat_url(endpoint_url or "") if _endpoint_supports is True: _is_api_model = True elif ( @@ -2318,12 +2318,11 @@ async def stream_agent_loop( or _model_no_tools or _is_ollama_native or _ollama_openai_compat - or _local_openai_compat ): _is_api_model = False else: _is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools - _compact_agent_prompt = _is_api_model or _is_ollama_native or _ollama_openai_compat or _local_openai_compat + _compact_agent_prompt = _is_api_model or _is_ollama_native or _ollama_openai_compat messages, mcp_schemas = _build_system_prompt( messages, model, active_document, mcp_mgr, disabled_tools, needs_admin=_needs_admin, relevant_tools=_relevant_tools, @@ -2782,7 +2781,12 @@ async def stream_agent_loop( _round_first_event_logged, _round_first_token_logged, ) - tool_blocks, used_native = _resolve_tool_blocks(round_response, native_tool_calls, round_num, is_api_model=_is_api_model) + tool_blocks, used_native = _resolve_tool_blocks( + round_response, + native_tool_calls, + round_num, + is_api_model=(_is_api_model and not guide_only), + ) # Force-answer round: we told the model to STOP calling tools and # answer. If it ignored that and emitted a (possibly DSML) tool @@ -2866,7 +2870,7 @@ async def stream_agent_loop( # model with no real native_tool_calls) must not be stripped from the # persisted text either — otherwise it streams once and then disappears # on reload (#3222 follow-up). - cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native)).strip() + cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native and not guide_only)).strip() round_texts.append(cleaned_round) if not tool_blocks: diff --git a/static/js/notes.js b/static/js/notes.js index 9758f3608..3b5a4e555 100644 --- a/static/js/notes.js +++ b/static/js/notes.js @@ -1120,6 +1120,7 @@ export function openPanel() { } _open = true; _editingId = null; + _searchQuery = ''; _clearViewedReminderGlows(); _firedDotDismissedAt = Date.now(); try { localStorage.setItem(REMINDER_DISMISSED_AT_KEY, String(_firedDotDismissedAt)); } catch {} diff --git a/tests/test_gpu_compose_standalone.py b/tests/test_gpu_compose_standalone.py index 57bdaf341..64c52577c 100644 --- a/tests/test_gpu_compose_standalone.py +++ b/tests/test_gpu_compose_standalone.py @@ -124,9 +124,9 @@ def test_nvidia_odysseus_adds_only_overlay(base): {"driver": "nvidia", "count": "all", "capabilities": ["gpu"]} ] - # No AMD-only keys leaked in. + # Base Docker socket group is preserved; no AMD-only keys leaked in. assert "devices" not in svc - assert "group_add" not in svc + assert svc["group_add"] == base_svc["group_add"] def test_amd_odysseus_adds_only_overlay(base): @@ -137,11 +137,10 @@ def test_amd_odysseus_adds_only_overlay(base): # Environment is unchanged from base for AMD. assert svc["environment"] == base_svc["environment"] - # devices and group_add are new and match the overlay exactly. + # devices are new; group_add preserves the base Docker group and appends AMD groups. assert "devices" not in base_svc - assert "group_add" not in base_svc assert svc["devices"] == ["/dev/kfd", "/dev/dri"] - assert svc["group_add"] == ["video", "${RENDER_GID:-render}"] + assert svc["group_add"] == base_svc["group_add"] + ["video", "${RENDER_GID:-render}"] # No NVIDIA-only keys leaked in. assert "deploy" not in svc diff --git a/tests/test_tool_support_heuristic.py b/tests/test_tool_support_heuristic.py index 9294fc740..468a210b5 100644 --- a/tests/test_tool_support_heuristic.py +++ b/tests/test_tool_support_heuristic.py @@ -18,7 +18,7 @@ def _compute_is_api_model(model: str, endpoint_url: str, endpoint_supports=None) model_supports_tools = any(kw in model_lc for kw in ( "gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma", "qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2", - "llama-3.3", "llama-4", + "llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4", "minimax", "kimi", "yi-", "phi-3", "phi-4", "command-r", "glm-4", "internlm", "hermes", "deepseek-v", "deepseek-chat",