From 57e7229219ae64342bac82abfa45f54a790f6877 Mon Sep 17 00:00:00 2001
From: pewdiepie-archdaemon <pewdiepie-archdaemon@users.noreply.github.com>
Date: Mon, 22 Jun 2026 02:08:25 +0000
Subject: [PATCH] CI fixes for cookbook workflow sync

---
 docker-compose.gpu-amd.yml           |  9 +++++++++
 docker-compose.gpu-nvidia.yml        | 10 ++++++++++
 routes/model_routes.py               | 15 +--------------
 src/agent_loop.py                    | 18 +++++++++++-------
 static/js/notes.js                   |  1 +
 tests/test_gpu_compose_standalone.py |  9 ++++-----
 tests/test_tool_support_heuristic.py |  2 +-
 7 files changed, 37 insertions(+), 27 deletions(-)
diff --git a/docker-compose.gpu-amd.yml b/docker-compose.gpu-amd.yml
index 82e22e440..5d5f8427e 100644
--- a/docker-compose.gpu-amd.yml
+++ b/docker-compose.gpu-amd.yml
@@ -28,6 +28,14 @@ services:
       # land under /app/.local for the odysseus user. Persist them so a
       # container recreate does not silently remove installed serve engines.
       - ${APP_DATA_DIR:-./data}/local:/app/.local:z
+      # Docker socket — lets Cookbook launch commands like
+      # `docker exec ollama-rocm ollama show <tag>` reach the host's
+      # Docker daemon (and sibling containers like ollama-rocm /
+      # ollama-test). The in-container user needs to be in the
+      # socket's owning group — see `group_add` below; the GID
+      # there must match the host's `docker` group (defaults to 963
+      # on Debian, 999 on Ubuntu — override via env if yours differs).
+      - /var/run/docker.sock:/var/run/docker.sock
     extra_hosts:
       # Lets the container reach local services on the Docker host, including
       # Ollama at http://host.docker.internal:11434.
@@ -93,6 +101,7 @@ services:
       - /dev/kfd
       - /dev/dri
     group_add:
+      - "${DOCKER_GID:-963}"
       - video
       - ${RENDER_GID:-render}
 
diff --git a/docker-compose.gpu-nvidia.yml b/docker-compose.gpu-nvidia.yml
index 1b551c669..c1f2cddb0 100644
--- a/docker-compose.gpu-nvidia.yml
+++ b/docker-compose.gpu-nvidia.yml
@@ -27,6 +27,16 @@ services:
       # land under /app/.local for the odysseus user. Persist them so a
       # container recreate does not silently remove installed serve engines.
       - ${APP_DATA_DIR:-./data}/local:/app/.local:z
+      # Docker socket — lets Cookbook launch commands like
+      # `docker exec ollama-rocm ollama show <tag>` reach the host's
+      # Docker daemon (and sibling containers like ollama-rocm /
+      # ollama-test). The in-container user needs to be in the
+      # socket's owning group — see `group_add` below; the GID
+      # there must match the host's `docker` group (defaults to 963
+      # on Debian, 999 on Ubuntu — override via env if yours differs).
+      - /var/run/docker.sock:/var/run/docker.sock
+    group_add:
+      - "${DOCKER_GID:-963}"
     extra_hosts:
       # Lets the container reach local services on the Docker host, including
       # Ollama at http://host.docker.internal:11434.
diff --git a/routes/model_routes.py b/routes/model_routes.py
index 89636b310..00fdb6eb0 100644
--- a/routes/model_routes.py
+++ b/routes/model_routes.py
@@ -896,25 +896,12 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
         pass
 
     try:
-        # OpenAI-compatible servers commonly expose /v1/models but return 404
-        # for the bare /v1 root. Probe models first for those bases to avoid
-        # noisy false-looking 404s in llama.cpp logs.
-        parsed = urlparse(base)
-        prefer_models_first = (parsed.path or "").rstrip("/").endswith("/v1")
-        if prefer_models_first:
-            try:
-                r0 = httpx.get(_safe_build_models_url(base), headers=headers, timeout=timeout, verify=llm_verify())
-                result0 = _result_from_response(r0)
-                if result0["reachable"]:
-                    return result0
-            except Exception as e:
-                last_error = str(e)[:120]
         r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify())
         result = _result_from_response(r)
         if result["reachable"]:
             return result
         sc = result.get("status_code") or 0
-        if 400 <= sc < 500 and sc not in (401, 403) and not prefer_models_first:
+        if 400 <= sc < 500 and sc not in (401, 403):
             models_url = _safe_build_models_url(base)
             try:
                 r2 = httpx.get(models_url, headers=headers,timeout=timeout, verify=llm_verify())
diff --git a/src/agent_loop.py b/src/agent_loop.py
index 27a448da4..f753a0b5b 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -2000,6 +2000,7 @@ async def stream_agent_loop(
         and not bool(_intent.get("continuation"))
         and not plan_mode
         and not approved_plan
+        and not guide_only
         and (_casual_low_signal_turn or active_document is None)
         and (_casual_low_signal_turn or not active_email)
         and (_casual_low_signal_turn or not workspace)
@@ -2103,7 +2104,7 @@ async def stream_agent_loop(
 
     # RAG-based tool selection: retrieve relevant tools for this query.
     # If caller provided a pre-computed set (e.g. task_scheduler), use that.
-    _relevant_tools = set() if guide_only else relevant_tools
+    _relevant_tools = relevant_tools
     _t1 = time.time()
     if _relevant_tools:
         logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
@@ -2279,7 +2280,7 @@ async def stream_agent_loop(
     _model_supports_tools = any(kw in _model_lc for kw in (
         "gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma",
         "qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2",
-        "llama-3.3", "llama-4",
+        "llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4",
         # Local-served models that follow OpenAI-style function calling
         # via vLLM's `--enable-auto-tool-choice`. Belt-and-suspenders
         # with the per-endpoint flag above.
@@ -2310,7 +2311,6 @@ async def stream_agent_loop(
     # the fenced-block path is used instead of native function calling.
     _is_ollama_native = _is_ollama_native_url(endpoint_url or "")
     _ollama_openai_compat = _is_ollama_openai_compat_url(endpoint_url or "")
-    _local_openai_compat = _is_local_openai_compat_url(endpoint_url or "")
     if _endpoint_supports is True:
         _is_api_model = True
     elif (
@@ -2318,12 +2318,11 @@ async def stream_agent_loop(
         or _model_no_tools
         or _is_ollama_native
         or _ollama_openai_compat
-        or _local_openai_compat
     ):
         _is_api_model = False
     else:
         _is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools
-    _compact_agent_prompt = _is_api_model or _is_ollama_native or _ollama_openai_compat or _local_openai_compat
+    _compact_agent_prompt = _is_api_model or _is_ollama_native or _ollama_openai_compat
     messages, mcp_schemas = _build_system_prompt(
         messages, model, active_document, mcp_mgr, disabled_tools,
         needs_admin=_needs_admin, relevant_tools=_relevant_tools,
@@ -2782,7 +2781,12 @@ async def stream_agent_loop(
             _round_first_event_logged,
             _round_first_token_logged,
         )
-        tool_blocks, used_native = _resolve_tool_blocks(round_response, native_tool_calls, round_num, is_api_model=_is_api_model)
+        tool_blocks, used_native = _resolve_tool_blocks(
+            round_response,
+            native_tool_calls,
+            round_num,
+            is_api_model=(_is_api_model and not guide_only),
+        )
 
         # Force-answer round: we told the model to STOP calling tools and
         # answer. If it ignored that and emitted a (possibly DSML) tool
@@ -2866,7 +2870,7 @@ async def stream_agent_loop(
         # model with no real native_tool_calls) must not be stripped from the
         # persisted text either — otherwise it streams once and then disappears
         # on reload (#3222 follow-up).
-        cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native)).strip()
+        cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native and not guide_only)).strip()
         round_texts.append(cleaned_round)
 
         if not tool_blocks:
diff --git a/static/js/notes.js b/static/js/notes.js
index 9758f3608..3b5a4e555 100644
--- a/static/js/notes.js
+++ b/static/js/notes.js
@@ -1120,6 +1120,7 @@ export function openPanel() {
   }
   _open = true;
   _editingId = null;
+  _searchQuery = '';
   _clearViewedReminderGlows();
   _firedDotDismissedAt = Date.now();
   try { localStorage.setItem(REMINDER_DISMISSED_AT_KEY, String(_firedDotDismissedAt)); } catch {}
diff --git a/tests/test_gpu_compose_standalone.py b/tests/test_gpu_compose_standalone.py
index 57bdaf341..64c52577c 100644
--- a/tests/test_gpu_compose_standalone.py
+++ b/tests/test_gpu_compose_standalone.py
@@ -124,9 +124,9 @@ def test_nvidia_odysseus_adds_only_overlay(base):
         {"driver": "nvidia", "count": "all", "capabilities": ["gpu"]}
     ]
 
-    # No AMD-only keys leaked in.
+    # Base Docker socket group is preserved; no AMD-only keys leaked in.
     assert "devices" not in svc
-    assert "group_add" not in svc
+    assert svc["group_add"] == base_svc["group_add"]
 
 
 def test_amd_odysseus_adds_only_overlay(base):
@@ -137,11 +137,10 @@ def test_amd_odysseus_adds_only_overlay(base):
     # Environment is unchanged from base for AMD.
     assert svc["environment"] == base_svc["environment"]
 
-    # devices and group_add are new and match the overlay exactly.
+    # devices are new; group_add preserves the base Docker group and appends AMD groups.
     assert "devices" not in base_svc
-    assert "group_add" not in base_svc
     assert svc["devices"] == ["/dev/kfd", "/dev/dri"]
-    assert svc["group_add"] == ["video", "${RENDER_GID:-render}"]
+    assert svc["group_add"] == base_svc["group_add"] + ["video", "${RENDER_GID:-render}"]
 
     # No NVIDIA-only keys leaked in.
     assert "deploy" not in svc
diff --git a/tests/test_tool_support_heuristic.py b/tests/test_tool_support_heuristic.py
index 9294fc740..468a210b5 100644
--- a/tests/test_tool_support_heuristic.py
+++ b/tests/test_tool_support_heuristic.py
@@ -18,7 +18,7 @@ def _compute_is_api_model(model: str, endpoint_url: str, endpoint_supports=None)
     model_supports_tools = any(kw in model_lc for kw in (
         "gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma",
         "qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2",
-        "llama-3.3", "llama-4",
+        "llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4",
         "minimax", "kimi", "yi-", "phi-3", "phi-4", "command-r",
         "glm-4", "internlm", "hermes",
         "deepseek-v", "deepseek-chat",