mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
fix(cookbook): surface backend diagnosis when serve fails in background (#1636)
* refactor(cookbook): move _diagnose_serve_output to module level in cookbook_helpers Extracts the nested _diagnose_serve_output function from inside setup_cookbook_routes() and moves it to module level in cookbook_helpers.py, alongside the other helper functions it logically belongs with. No behaviour change — the function is now importable directly for testing and by other callers without going through the route factory closure. * fix(cookbook): surface backend diagnosis when serve fails in background The background poll (_pollBackgroundStatus) already received `diagnosis` and `cmd` from /api/cookbook/tasks/status but discarded both. When a serve job died while the Cookbook modal was closed, reopening it showed only a red error badge with no context. - Persist live.diagnosis into task._backendDiagnosis in localStorage so it survives modal close/reopen and page refresh - Persist live.cmd into task.payload._cmd for agent-spawned tasks so the crash report includes the actual command - After _renderRunningTab(), walk rendered cards and call _showDiagnosis() for any that have a stored _backendDiagnosis but no panel yet - In _renderTaskCard(), use _backendDiagnosis as a fallback when the client-side _terminalServeDiagnosis() finds nothing * test(cookbook): add coverage for _diagnose_serve_output error patterns 10 tests verifying the 16 serve-failure patterns: - CUDA OOM, port-in-use, vLLM missing, gated model - Traceback fallback fires without startup success marker - Traceback suppressed when server actually started - Clean/empty output returns None - trust-remote-code and no-GGUF patterns
This commit is contained in:
+1
-122
@@ -40,7 +40,7 @@ from routes.cookbook_helpers import (
|
||||
_append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
|
||||
_ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache,
|
||||
_user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
|
||||
ModelDownloadRequest, ServeRequest,
|
||||
ModelDownloadRequest, ServeRequest, _diagnose_serve_output,
|
||||
)
|
||||
|
||||
_HF_TOKEN_STATUS_SNIPPET = (
|
||||
@@ -81,127 +81,6 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
task["payload"].pop("hf_token", None)
|
||||
return state
|
||||
|
||||
def _diagnose_serve_output(text: str) -> dict | None:
|
||||
"""Server-side mirror of the Cookbook UI's common serve diagnoses.
|
||||
|
||||
The browser uses cookbook-diagnosis.js for clickable fixes. This gives
|
||||
the agent/tool path the same structured signal so it can retry with an
|
||||
adjusted command instead of guessing from raw tmux output.
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
tail = text[-6000:]
|
||||
patterns = [
|
||||
(
|
||||
r"No available memory for the cache blocks|Available KV cache memory:.*-",
|
||||
"No GPU memory left for KV cache after loading model.",
|
||||
[
|
||||
{"label": "retry with GPU memory utilization 0.95", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.95"},
|
||||
{"label": "retry with context 2048", "op": "replace", "flag": "--max-model-len", "value": "2048"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"CUDA out of memory|torch\.cuda\.OutOfMemoryError|CUDA error: out of memory|warming up sampler|max_num_seqs.*gpu_memory_utilization",
|
||||
"GPU ran out of memory during startup or warmup.",
|
||||
[
|
||||
{"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"},
|
||||
{"label": "retry with GPU memory utilization 0.80", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.80"},
|
||||
{"label": "retry with --enforce-eager", "op": "append", "arg": "--enforce-eager"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"not divisib|must be divisible|attention heads.*divisible",
|
||||
"Tensor parallel size is incompatible with the model.",
|
||||
[
|
||||
{"label": "retry with tensor parallel size 1", "op": "replace", "flag": "--tensor-parallel-size", "value": "1"},
|
||||
{"label": "retry with tensor parallel size 2", "op": "replace", "flag": "--tensor-parallel-size", "value": "2"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"KV cache.*too (small|large)|max_model_len.*exceeds|maximum.*context",
|
||||
"Context length is too large for available GPU memory.",
|
||||
[
|
||||
{"label": "retry with context 8192", "op": "replace", "flag": "--max-model-len", "value": "8192"},
|
||||
{"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"enable-auto-tool-choice requires --tool-call-parser",
|
||||
"Auto tool choice requires an explicit tool call parser.",
|
||||
[{"label": "retry with Hermes tool parser", "op": "append", "arg": "--tool-call-parser hermes"}],
|
||||
),
|
||||
(
|
||||
r"Please pass.*trust.remote.code=True|contains custom code which must be executed to correctly load|does not recognize this architecture|model type.*but Transformers does not",
|
||||
"Model requires custom code or newer model support.",
|
||||
[{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}],
|
||||
),
|
||||
(
|
||||
r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer",
|
||||
"vLLM/Transformers kernel package mismatch.",
|
||||
[{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}],
|
||||
),
|
||||
(
|
||||
r"Address already in use|bind.*address.*in use",
|
||||
"Port is already in use.",
|
||||
[{"label": "retry on port 8001", "op": "replace", "flag": "--port", "value": "8001"}],
|
||||
),
|
||||
(
|
||||
r"No CUDA GPUs are available|no GPU.*found|CUDA_VISIBLE_DEVICES.*invalid",
|
||||
"No GPUs are visible to the serve process.",
|
||||
[{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}],
|
||||
),
|
||||
(
|
||||
r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available",
|
||||
"vLLM could not find a supported GPU (CUDA or ROCm). "
|
||||
"This machine may have integrated or unsupported graphics only.",
|
||||
[
|
||||
{"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"},
|
||||
{"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed",
|
||||
"vLLM is not installed or not in PATH on this server.",
|
||||
[{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
|
||||
),
|
||||
(
|
||||
r"sglang.*command not found|No module named sglang|SGLang is not installed",
|
||||
"SGLang is not installed or not in PATH on this server.",
|
||||
[{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}],
|
||||
),
|
||||
(
|
||||
r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found",
|
||||
"llama.cpp / llama-cpp-python dependencies are missing.",
|
||||
[{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
|
||||
),
|
||||
(
|
||||
r"No GGUF found on this host|no \.gguf file|No GGUF file found",
|
||||
"No GGUF file found for this model on this host. The llama.cpp backend needs a .gguf file.",
|
||||
[{"label": "download a GGUF build of this model (repo name usually ends in -GGUF, file like Q4_K_M.gguf)", "op": "manual"}],
|
||||
),
|
||||
(
|
||||
r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers",
|
||||
"Diffusion serving requires PyTorch and diffusers.",
|
||||
[{"label": "install diffusers[torch] in Cookbook Dependencies", "op": "dependency", "package": "diffusers[torch]"}],
|
||||
),
|
||||
(
|
||||
r"403 Forbidden|401 Unauthorized|Access to model.*is restricted|gated repo|not in the authorized list|awaiting a review",
|
||||
"Model access is gated or unauthorized.",
|
||||
[{"label": "set HF token and request model access on HuggingFace", "op": "manual"}],
|
||||
),
|
||||
]
|
||||
for pattern, message, suggestions in patterns:
|
||||
if re.search(pattern, tail, re.I):
|
||||
return {"message": message, "suggestions": suggestions}
|
||||
if re.search(r"Traceback \(most recent call last\)", tail, re.I) and not re.search(
|
||||
r"Application startup complete|GET /v1/|Uvicorn running on", tail, re.I
|
||||
):
|
||||
return {
|
||||
"message": "Python traceback detected during serve startup.",
|
||||
"suggestions": [{"label": "inspect traceback and retry with adjusted backend/settings", "op": "manual"}],
|
||||
}
|
||||
return None
|
||||
|
||||
def _state_for_client(state):
|
||||
"""Return cookbook state without raw secrets for browser clients."""
|
||||
_strip_task_secrets(state)
|
||||
|
||||
Reference in New Issue
Block a user