fix(cookbook): treat local Windows as Windows for serve commands (#3975)

* fix(cookbook): prefer native llama-server on local Windows * fix(cookbook): harden local llama-server launch commands * fix(cookbook): build serve commands for selected target
2026-06-30 00:22:10 -04:00 · 2026-06-26 08:13:01 -04:00
parent de12d4734a
commit f5200ec45b
7 changed files with 190 additions and 36 deletions
@@ -73,6 +73,9 @@ def setup_cookbook_routes() -> APIRouter:
            return "stored"
        return f"{value[:4]}...{value[-4:]}"

+    def _client_host_platform() -> str:
+        return "windows" if IS_WINDOWS else ""
+
    def _decrypt_secret(value: str | None) -> str:
        if not value:
            return ""
@@ -245,11 +248,15 @@ def setup_cookbook_routes() -> APIRouter:
        """Return cookbook state without raw secrets for browser clients."""
        _strip_task_secrets(state)
        env = state.get("env") if isinstance(state, dict) else None
+        if isinstance(state, dict) and not isinstance(env, dict):
+            env = {}
+            state["env"] = env
        if isinstance(env, dict):
            token = _decrypt_secret(env.get("hfToken"))
            env.pop("hfToken", None)
            env["hfTokenConfigured"] = bool(token)
            env["hfTokenMasked"] = _mask_secret(token)
+            env["hostPlatform"] = _client_host_platform()
        return state

    def _state_for_storage(state, on_disk=None):
@@ -268,6 +275,7 @@ def setup_cookbook_routes() -> APIRouter:
                env.pop("hfToken", None)
            env.pop("hfTokenMasked", None)
            env.pop("hfTokenConfigured", None)
+            env.pop("hostPlatform", None)
        return state

    def _load_stored_hf_token() -> str:
@@ -1479,6 +1487,10 @@ def setup_cookbook_routes() -> APIRouter:
            # shell resolves the bundled python3/hf, mirroring the download flow.
            if not remote:
                runner_lines.append(_local_tooling_path_export(sys.executable))
+                if local_windows:
+                    # Detached Git Bash runs do not always inherit recently edited
+                    # user PATH entries from the already-running Odysseus process.
+                    runner_lines.append('export PATH="$HOME/bin:$HOME/llama.cpp/build-cuda/bin/Release:$HOME/llama.cpp/build/bin/Release:$HOME/llama.cpp/build/bin/Debug:$HOME/llama.cpp/build/bin:$PATH"')
            runner_lines.append("export FLASHINFER_DISABLE_VERSION_CHECK=1")
            if req.hf_token:
                runner_lines.append(f"export HF_TOKEN='{_bash_squote(req.hf_token)}'")
@@ -1493,7 +1505,8 @@ def setup_cookbook_routes() -> APIRouter:
            runner_lines.append(_HF_TOKEN_STATUS_SNIPPET)
            handled_ollama_serve = False
            # Auto-install inference engine if missing
-            if "llama_cpp" in req.cmd or "llama-server" in req.cmd:
+            local_windows_llama_cmd = local_windows and ("llama_cpp" in req.cmd or "llama-server" in req.cmd)
+            if ("llama_cpp" in req.cmd or "llama-server" in req.cmd) and not local_windows_llama_cmd:
                # Prefer the NATIVE llama-server binary — its minja templating
                # renders modern GGUF chat templates that the Python bindings'
                # Jinja2 rejects (do_tojson ensure_ascii). Build it once from
@@ -2396,8 +2409,8 @@ def setup_cookbook_routes() -> APIRouter:
            try:
                return _state_for_client(json.loads(_cookbook_state_path.read_text(encoding="utf-8")))
            except Exception:
-                return {}
-        return {}
+                return _state_for_client({})
+        return _state_for_client({})

    @router.post("/api/cookbook/state")
    async def save_cookbook_state(request: Request):