fix(cookbook): treat local Windows as Windows for serve commands (#3975)

* fix(cookbook): prefer native llama-server on local Windows

* fix(cookbook): harden local llama-server launch commands

* fix(cookbook): build serve commands for selected target
This commit is contained in:
Dividesbyzer0
2026-06-26 08:13:01 -04:00
committed by GitHub
parent de12d4734a
commit f5200ec45b
7 changed files with 190 additions and 36 deletions
+16 -3
View File
@@ -73,6 +73,9 @@ def setup_cookbook_routes() -> APIRouter:
return "stored"
return f"{value[:4]}...{value[-4:]}"
def _client_host_platform() -> str:
return "windows" if IS_WINDOWS else ""
def _decrypt_secret(value: str | None) -> str:
if not value:
return ""
@@ -245,11 +248,15 @@ def setup_cookbook_routes() -> APIRouter:
"""Return cookbook state without raw secrets for browser clients."""
_strip_task_secrets(state)
env = state.get("env") if isinstance(state, dict) else None
if isinstance(state, dict) and not isinstance(env, dict):
env = {}
state["env"] = env
if isinstance(env, dict):
token = _decrypt_secret(env.get("hfToken"))
env.pop("hfToken", None)
env["hfTokenConfigured"] = bool(token)
env["hfTokenMasked"] = _mask_secret(token)
env["hostPlatform"] = _client_host_platform()
return state
def _state_for_storage(state, on_disk=None):
@@ -268,6 +275,7 @@ def setup_cookbook_routes() -> APIRouter:
env.pop("hfToken", None)
env.pop("hfTokenMasked", None)
env.pop("hfTokenConfigured", None)
env.pop("hostPlatform", None)
return state
def _load_stored_hf_token() -> str:
@@ -1479,6 +1487,10 @@ def setup_cookbook_routes() -> APIRouter:
# shell resolves the bundled python3/hf, mirroring the download flow.
if not remote:
runner_lines.append(_local_tooling_path_export(sys.executable))
if local_windows:
# Detached Git Bash runs do not always inherit recently edited
# user PATH entries from the already-running Odysseus process.
runner_lines.append('export PATH="$HOME/bin:$HOME/llama.cpp/build-cuda/bin/Release:$HOME/llama.cpp/build/bin/Release:$HOME/llama.cpp/build/bin/Debug:$HOME/llama.cpp/build/bin:$PATH"')
runner_lines.append("export FLASHINFER_DISABLE_VERSION_CHECK=1")
if req.hf_token:
runner_lines.append(f"export HF_TOKEN='{_bash_squote(req.hf_token)}'")
@@ -1493,7 +1505,8 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(_HF_TOKEN_STATUS_SNIPPET)
handled_ollama_serve = False
# Auto-install inference engine if missing
if "llama_cpp" in req.cmd or "llama-server" in req.cmd:
local_windows_llama_cmd = local_windows and ("llama_cpp" in req.cmd or "llama-server" in req.cmd)
if ("llama_cpp" in req.cmd or "llama-server" in req.cmd) and not local_windows_llama_cmd:
# Prefer the NATIVE llama-server binary — its minja templating
# renders modern GGUF chat templates that the Python bindings'
# Jinja2 rejects (do_tojson ensure_ascii). Build it once from
@@ -2396,8 +2409,8 @@ def setup_cookbook_routes() -> APIRouter:
try:
return _state_for_client(json.loads(_cookbook_state_path.read_text(encoding="utf-8")))
except Exception:
return {}
return {}
return _state_for_client({})
return _state_for_client({})
@router.post("/api/cookbook/state")
async def save_cookbook_state(request: Request):