mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-18 18:55:28 -04:00
fix(cookbook): scope the "Kill vLLM" diagnosis to actual vLLM tracebacks (#4517)
The diagnosis panel offered a "Kill vLLM processes" (pkill -f vllm) recovery for ANY Python traceback — including pip build failures and other tracebacks that have nothing to do with vLLM. That advice is useless for a build failure and harmful if an unrelated vLLM server happens to be running. ERROR_PATTERNS in static/js/cookbook-diagnosis.js had one catch-all traceback matcher that always attached the vLLM-kill fix. Split it into three (all keeping the existing healthy-server suppression): - pip build failure (Failed to build / metadata-generation-failed / subprocess-exited-with-error / Could not build wheels) -> "a dependency failed to build" message, no kill. - vLLM-specific traceback (tail mentions vllm) -> keeps the kill, now scoped. - any other traceback -> neutral "check the captured output" message, no kill. How to test: - node --check static/js/cookbook-diagnosis.js - Trigger a wheel-build failure (old package on a newer Python) or a non-vLLM traceback and open the diagnosis. Before: generic traceback message + "Kill vLLM processes" button. After: a build-failure / neutral message with no kill; only a real vLLM traceback still offers it. Fixes #4516 Co-authored-by: Claude
This commit is contained in:
committed by
GitHub
parent
396e26b4bf
commit
e7ffc69729
@@ -578,24 +578,50 @@ export const ERROR_PATTERNS = [
|
||||
],
|
||||
},
|
||||
{
|
||||
// Tail-only + healthy-server suppression. tmux capture-pane returns the
|
||||
// entire scrollback every poll, so a one-shot startup traceback would
|
||||
// otherwise stick on the panel forever even while the server happily
|
||||
// serves /v1/models. Only fire if the traceback is in recent output AND
|
||||
// the server isn't currently logging healthy traffic.
|
||||
// Dependency-install (pip) build failure — a required package failed to
|
||||
// build its wheel (common when an old sdist's setup.py breaks on a newer
|
||||
// Python, e.g. basicsr on 3.13). This is an install problem, NOT a serve
|
||||
// problem, so it must never suggest killing vLLM.
|
||||
match: (text) => {
|
||||
const TAIL = text.slice(-6000);
|
||||
// A serve script can run a fallback build and then start serving fine —
|
||||
// don't flag a stale build error once the server is up.
|
||||
if (/Application startup complete|"(?:GET|POST)\s+\/v1\/[^"]+ HTTP\/[\d.]+"\s*2\d\d|Uvicorn running on|server is listening on https?:\/\//i.test(TAIL)) return false;
|
||||
return /Failed to build\b|subprocess-exited-with-error|Could not build wheels|metadata-generation-failed/i.test(TAIL);
|
||||
},
|
||||
message: 'A dependency failed to build during install — usually an older package whose build breaks on this Python version, not a server problem. The install did not finish.',
|
||||
suggestion: 'Suggested action: check the captured output for the package that failed to build; it may need a newer release or a patch to install on this Python version.',
|
||||
fixes: [],
|
||||
},
|
||||
{
|
||||
// vLLM-specific traceback: only offer the kill-processes recovery when the
|
||||
// output is actually about vLLM. Tail-only + healthy-server suppression so
|
||||
// a one-shot startup traceback doesn't stick on the panel forever while
|
||||
// the server happily serves /v1/models.
|
||||
match: (text) => {
|
||||
const TAIL = text.slice(-4096);
|
||||
if (!/Traceback \(most recent call last\)/i.test(TAIL)) return false;
|
||||
// Healthy markers in the tail mean whatever blew up has been recovered
|
||||
// from — the server is up and answering requests.
|
||||
if (/Application startup complete|"GET \/v1\/[^"]+ HTTP\/[\d.]+" 2\d\d|Uvicorn running on/i.test(TAIL)) return false;
|
||||
return true;
|
||||
return /vllm/i.test(TAIL);
|
||||
},
|
||||
message: 'Python traceback detected — may be a handled error, check logs.',
|
||||
message: 'A vLLM process hit a Python traceback and may be wedged.',
|
||||
fixes: [
|
||||
{ label: 'Kill vLLM processes', action: (panel) => _runQuickCmd(panel, 'pkill -f vllm') },
|
||||
],
|
||||
},
|
||||
{
|
||||
// Generic traceback (not vLLM, not a pip build): surface it without
|
||||
// suggesting an unrelated vLLM kill. Same tail-only + healthy suppression.
|
||||
match: (text) => {
|
||||
const TAIL = text.slice(-4096);
|
||||
if (!/Traceback \(most recent call last\)/i.test(TAIL)) return false;
|
||||
if (/Application startup complete|"GET \/v1\/[^"]+ HTTP\/[\d.]+" 2\d\d|Uvicorn running on/i.test(TAIL)) return false;
|
||||
return true;
|
||||
},
|
||||
message: 'Python traceback detected — check the captured output below for the underlying error.',
|
||||
suggestion: 'Suggested action: read the captured output for the failing step; copy the troubleshooting bundle if you need help.',
|
||||
fixes: [],
|
||||
},
|
||||
];
|
||||
|
||||
export function _diagnose(text) {
|
||||
|
||||
Reference in New Issue
Block a user