// ============================================ // COOKBOOK DIAGNOSIS SUB-MODULE // Error pattern matching and diagnosis UI // ============================================ import { _envState, _loadTasks, _removeTask, _launchServeTask, _buildEnvPrefix, _sshCmd, _setPanelField, _setPanelCheckbox, _copyText, _persistEnvState, _tmuxCmd, _serveAutoRetry, _serveAutoRetryReplace, _serveAutoRetryRemove, _serveAutoFix, // Plain specifier (no ?v=) — must match every other cookbook.js importer so the // browser loads it once. See cookbook-hwfit.js. } from './cookbook.js'; import uiModule from './ui.js'; // Tiny HTML-escape — keeps the file standalone instead of leaning on a // shared helper that may not be exported from this module's import surface. function _diagEsc(s) { return String(s ?? '').replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c])); } // Pick an icon for a diagnosis-action button based on the label. The icon // renders on the LEFT of the button text. Keeps the strokes consistent // across the set so they read as one family. function _diagFixIcon(label) { const l = String(label || '').toLowerCase(); const _svg = (path) => ``; if (l.startsWith('retry') || l.includes('relaunch') || l.includes('restart')) { // Circular-arrow refresh return _svg(''); } if (l.startsWith('copy')) { return _svg(''); } if (l.startsWith('edit')) { return _svg(''); } if (l.startsWith('open') || l.includes('dependencies')) { return _svg(''); } if (l.startsWith('install') || l.includes('upgrade')) { return _svg(''); } if (l.startsWith('kill') || l.startsWith('stop')) { return _svg(''); } if (l.startsWith('switch') || l.includes('use ')) { return _svg(''); } // Default: lightbulb (generic "suggestion") return _svg(''); } import spinnerModule from './spinner.js'; // ── Error diagnosis ── // Re-exported so callers (Launch-tab pre-flight) can deep-link into the // Dependencies tab + auto-expand a specific backend's recipe panel and // pre-select the model they were trying to launch. export function openCookbookDependencies(pkgName = '', opts = {}) { _openCookbookDependencies(pkgName, opts); } function _openCookbookDependencies(pkgName = '', opts = {}) { const cookbook = window.cookbookModule; if (cookbook && typeof cookbook.open === 'function') { cookbook.open({ tab: 'Dependencies' }); } else { document.getElementById('tool-cookbook-btn')?.click(); } const wanted = String(pkgName || '').toLowerCase(); const tryHighlight = (attempt = 0) => { const modal = document.getElementById('cookbook-modal'); const tab = modal?.querySelector('.cookbook-tab[data-backend="Dependencies"]'); if (tab && !tab.classList.contains('active')) tab.click(); const rows = [...document.querySelectorAll('#cookbook-deps-list [data-pkg-name]')]; if (!rows.length) { if (attempt < 45) setTimeout(() => tryHighlight(attempt + 1), 100); return; } if (!wanted) return; const row = rows.find(r => { const name = (r.dataset.pkgName || '').toLowerCase(); const pip = (r.dataset.depPip || '').toLowerCase(); return name === wanted || pip.includes(wanted) || wanted.includes(name); }); if (row) { row.scrollIntoView({ block: 'center' }); row.classList.add('cookbook-pkg-flash'); setTimeout(() => row.classList.remove('cookbook-pkg-flash'), 1800); // Pre-flight deep link: auto-expand the recipe panel + pre-select // the model the user was trying to launch. The dropdown values are // now full model ids (sourced from _cachedModelIds), so we match by // exact value first, then fall back to a substring match. if (opts.expandRecipe) { const caret = row.querySelector('[data-dep-recipe-toggle]'); if (caret && caret.getAttribute('aria-expanded') !== 'true') caret.click(); if (opts.model) { const sel = document.querySelector(`[data-dep-recipe-pick="${CSS.escape(opts.expandRecipe)}"]`); if (sel) { const wanted = String(opts.model); let matched = false; for (let i = 0; i < sel.options.length; i++) { if (sel.options[i].value === wanted) { sel.value = wanted; matched = true; break; } } if (!matched) { for (let i = 0; i < sel.options.length; i++) { if (sel.options[i].value && wanted.includes(sel.options[i].value)) { sel.value = sel.options[i].value; matched = true; break; } } } if (matched) sel.dispatchEvent(new Event('change')); } } } } }; tryHighlight(); } function _openServeEditFromDiagnosis(panel, fields = null) { const task = panel?.closest?.('.cookbook-task'); if (!task) return; task.dispatchEvent(new CustomEvent('cookbook:edit-serve', { bubbles: true, detail: { fields } })); } function _openCpuServeEdit(panel) { _openServeEditFromDiagnosis(panel, { backend: 'llamacpp', gpus: '', tp: '1', gpu_mem: '0.80', _forceBackend: true, }); } // Infer the gated base repo that single-file checkpoints need configs from function _inferBaseRepo(text) { if (!text) return null; const t = text.toLowerCase(); if (t.includes('sd3.5') || t.includes('stable-diffusion-3.5')) return 'stabilityai/stable-diffusion-3.5-large'; if (t.includes('sd3') || t.includes('stable-diffusion-3')) return 'stabilityai/stable-diffusion-3-medium-diffusers'; if (t.includes('flux')) return 'black-forest-labs/FLUX.1-schnell'; if (t.includes('sdxl') || t.includes('stable-diffusion-xl')) return 'stabilityai/stable-diffusion-xl-base-1.0'; return null; } export const ERROR_PATTERNS = [ { pattern: /No available memory for the cache blocks|Available KV cache memory:.*-/i, message: 'No GPU memory left for KV cache after loading model.', fixes: [ { label: 'Retry with GPU mem 0.95', action: (panel) => _serveAutoRetryReplace(panel, '--gpu-memory-utilization', '0.95') }, { label: 'Retry with context 2048', action: (panel) => _serveAutoRetryReplace(panel, '--max-model-len', '2048') }, { label: 'Retry with more GPUs (TP=8)', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '8') }, ], }, { pattern: /warming up sampler|max_num_seqs.*gpu_memory_utilization/i, message: 'OOM during warmup. Lower GPU memory or max sequences.', fixes: [ { label: 'Retry with GPU mem 0.80', action: (panel) => _serveAutoRetryReplace(panel, '--gpu-memory-utilization', '0.80') }, { label: 'Retry with --max-num-seqs 64', action: (panel) => _serveAutoRetry(panel, '--max-num-seqs 64') }, { label: 'Retry with --max-num-seqs 32', action: (panel) => _serveAutoRetry(panel, '--max-num-seqs 32') }, ], }, { pattern: /CUDA out of memory|torch\.cuda\.OutOfMemoryError|CUDA error: out of memory/i, message: 'GPU ran out of memory. Try more GPUs (higher TP) or lower context.', fixes: [ { label: 'Retry with TP=2', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '2') }, { label: 'Retry with TP=4', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '4') }, { label: 'Retry with GPU mem 0.80', action: (panel) => _serveAutoRetryReplace(panel, '--gpu-memory-utilization', '0.80') }, { label: 'Retry with context 4096', action: (panel) => _serveAutoRetryReplace(panel, '--max-model-len', '4096') }, { label: 'Retry with --enforce-eager', action: (panel) => _serveAutoRetry(panel, '--enforce-eager') }, ], }, { pattern: /not divisible by weight quantization|quantization block/i, message: 'FP8 MoE quantization is incompatible with this tensor-parallel split.', suggestion: 'Suggested action: retry with a lower tensor-parallel size, such as TP=4 or TP=2. If it still fails, use a non-FP8/GGUF version of the model.', fixes: [ { label: 'Retry with TP=4', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '4') }, { label: 'Retry with TP=2', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '2') }, { label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) }, ], }, { pattern: /There is no module or parameter named ['"]lm_head\.input_scale['"]|lm_head\.input_scale|weight_scale_2/i, message: 'vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.', suggestion: 'Suggested action: upgrade vLLM through the environment that provides this CLI (package manager, venv, Docker image, or source checkout), or choose a compatible checkpoint.', fixes: [ { label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') }, { label: 'Copy upgrade hint', action: () => _copyText('Upgrade the vLLM environment that provides the selected vllm CLI, or use a compatible checkpoint. Do not assume Odysseus owns PATH/system/source/Docker installs.'), }, ], }, { pattern: /not divisib|must be divisible|attention heads.*divisible/i, message: 'Tensor parallel size incompatible with model dimensions.', fixes: [ { label: 'Retry with TP=1', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '1') }, { label: 'Retry with TP=2', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '2') }, { label: 'Retry with TP=4', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '4') }, ], }, { pattern: /Too large swap space|swap space.*total CPU memory/i, message: 'Swap space too large for available CPU memory.', fixes: [ { label: 'Retry without swap', action: (panel) => _serveAutoRetryRemove(panel, '--swap-space') }, { label: 'Retry with swap 1', action: (panel) => _serveAutoRetryReplace(panel, '--swap-space', '1') }, ], }, { pattern: /swap space|not enough.*memory.*cpu|Cannot allocate memory/i, message: 'Not enough CPU RAM or swap space.', fixes: [ { label: 'Retry without swap', action: (panel) => _serveAutoRetryRemove(panel, '--swap-space') }, { label: 'Lower max context to 4096', action: (panel) => _setPanelField(panel, 'ctx', '4096') }, ], }, { pattern: /unrecognized arguments:\s*--swap-space/i, message: '--swap-space was removed in newer vLLM versions. Remove it from the command.', fixes: [ { label: 'Retry without swap', action: (panel) => _serveAutoRetryRemove(panel, '--swap-space') }, ], }, { pattern: /Address already in use|bind.*address.*in use/i, message: 'Port is already in use. Another server may be running.', fixes: [ { label: 'Kill existing vLLM', action: (panel) => _runQuickCmd(panel, 'pkill -f vllm') }, { label: 'Use port 8001', action: (panel) => _setPanelField(panel, 'port', '8001') }, ], }, { pattern: /No CUDA GPUs are available|no GPU.*found|CUDA_VISIBLE_DEVICES.*invalid/i, message: 'No GPUs visible. Check your GPU selection or driver.', fixes: [ { label: 'Clear GPU selection (use all)', action: (panel) => { _setPanelField(panel, 'gpus', ''); _envState.gpus = ''; _persistEnvState(); }}, ], }, { pattern: /403 Forbidden|401 Unauthorized|Access to model.*is restricted|gated repo|not in the authorized list|awaiting a review/i, message: 'Gated model. Your HF token IS being sent — but its account must be granted access first: open the model page, accept the license, and wait for approval (Meta models can take a while).', // Extract repo name from error text to build HF link _repoPattern: /Access to model\s+(\S+)\s+is restricted|gated repo.*?huggingface\.co\/([^\s/]+\/[^\s/]+)/i, fixes: [ { label: 'Request access on HF', action: (panel, _text) => { const m = _text && (_text.match(/Access to model\s+(\S+)\s+is restricted/i) || _text.match(/huggingface\.co\/([^\s/]+\/[^\s/]+)/i)); const repo = m && (m[1] || m[2]); if (repo) window.open('https://huggingface.co/' + repo, '_blank'); else window.open('https://huggingface.co/settings/gated-repos', '_blank'); }}, { label: 'Check HF Token', action: (panel) => { const el = panel.querySelector('[data-field="hf_token"]'); if (el) { el.focus(); el.style.borderColor = 'var(--red)'; } }}, ], }, { pattern: /Weights for this component appear to be missing|load the component before passing/i, message: 'Single-file checkpoint needs a base model for missing components (text encoder, VAE). The base model may be gated — accept the license and set your HF token.', fixes: [ { label: 'Request access to base model', action: (panel, _text) => { // Extract gated repo from error, or infer from model name const gated = _text && _text.match(/Access to model\s+(\S+)\s+is restricted/i); const base = _text && _text.match(/config=([^\s,)]+)/i); const model = _text && _text.match(/load model from\s+(\S+)/i); const repo = (gated && gated[1]) || (base && base[1]) || _inferBaseRepo(_text); if (repo) window.open('https://huggingface.co/' + repo, '_blank'); else if (model && model[1]) window.open('https://huggingface.co/' + model[1].replace(/[.]$/, ''), '_blank'); }}, { label: 'Check HF Token', action: (panel) => { const el = panel.querySelector('[data-field="hf_token"]'); if (el) { el.focus(); el.style.borderColor = 'var(--red)'; } }}, ], }, { pattern: /Entry Not Found.*model_index\.json|Could not load model.*Check diffusers/i, message: 'Single-file model — needs base config from a gated repo. Accept the license and set your HF token.', fixes: [ { label: 'Request access to base model', action: (panel, _text) => { const gated = _text && _text.match(/Access to model\s+(\S+)\s+is restricted/i); const repo = (gated && gated[1]) || _inferBaseRepo(_text); if (repo) window.open('https://huggingface.co/' + repo, '_blank'); else window.open('https://huggingface.co/settings/gated-repos', '_blank'); }}, { label: 'Check HF Token', action: (panel) => { const el = panel.querySelector('[data-field="hf_token"]'); if (el) { el.focus(); el.style.borderColor = 'var(--red)'; } }}, ], }, { pattern: /does not appear to have a file named|not a valid model|No such file or directory.*model/i, message: 'Model path or ID not found.', fixes: [ { label: 'Check model name', action: (panel) => { const header = panel.querySelector('.hwfit-panel-model'); if (header) header.style.color = 'var(--red)'; }}, ], }, { pattern: /NCCL error|ncclSystemError|ncclInternalError/i, message: 'Multi-GPU communication (NCCL) failed.', fixes: [ { label: 'Set TP to 1 (single GPU)', action: (panel) => _setPanelField(panel, 'tp', '1') }, { label: 'Enable enforce eager', action: (panel) => _setPanelCheckbox(panel, 'enforce_eager', true) }, ], }, { pattern: /KV cache.*too (small|large)|max_model_len.*exceeds|maximum.*context/i, message: 'Context length too large for available GPU memory.', fixes: [ { label: 'Lower to 8192', action: (panel) => _setPanelField(panel, 'ctx', '8192') }, { label: 'Lower to 4096', action: (panel) => _setPanelField(panel, 'ctx', '4096') }, { label: 'Lower to 2048', action: (panel) => _setPanelField(panel, 'ctx', '2048') }, ], }, { pattern: /vllm.*command not found|No module named vllm/i, message: 'vLLM is not installed or not in PATH.', fixes: [ { label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') }, { label: 'Check environment is set', action: (panel) => { const el = panel.querySelector('[data-field="env_type"]'); if (el) { el.focus(); el.style.borderColor = 'var(--red)'; } }}, ], }, { pattern: /sglang.*command not found|No module named sglang|SGLang is not installed/i, message: 'SGLang is not installed or not in PATH.', fixes: [ { label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') }, { label: 'Copy install command', action: () => _copyText('python3 -m pip install "sglang[all]"') }, ], }, { pattern: /No accelerator \(CUDA, XPU, HPU, NPU, MUSA, MPS\) is available|Triton is not supported on current platform/i, message: 'SGLang needs a visible GPU/accelerator on this server.', suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.', fixes: [ { label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) }, { label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) }, ], }, { pattern: /flashinfer.*version.*does not match|flashinfer-cubin version/i, message: 'FlashInfer version mismatch.', fixes: [ { label: 'Auto-fix: bypass version check', action: (panel) => _serveAutoFix(panel, 'FLASHINFER_DISABLE_VERSION_CHECK=1'), autofix: true }, { label: 'Fix properly: pip install matching version', action: () => {} }, ], }, { pattern: /torch\.cuda\.is_available\(\).*False|No CUDA runtime/i, message: 'vLLM needs a visible CUDA/ROCm GPU.', suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.', fixes: [ { label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) }, { label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) }, ], }, { pattern: /Engine core initialization failed/i, message: 'vLLM engine failed to start. Check the error above.', fixes: [ { label: 'Retry with --enforce-eager', action: (panel) => _serveAutoRetry(panel, '--enforce-eager'), autofix: true }, { label: 'Retry with context 4096', action: (panel) => _serveAutoRetry(panel, '--max-model-len 4096'), autofix: true }, { label: 'Lower context to 4096', action: (panel) => _setPanelField(panel, 'ctx', '4096') }, { label: 'Lower GPU mem to 0.80', action: (panel) => _setPanelField(panel, 'gpu_mem', '0.80') }, ], }, { pattern: /weight_loader.*unexpected keyword|Unexpected key.*state_dict/i, message: 'Model format incompatible with this vLLM version.', fixes: [ { label: 'Try trust remote code', action: (panel) => _setPanelCheckbox(panel, 'trust_remote', true) }, ], }, { pattern: /enable-auto-tool-choice requires --tool-call-parser/i, message: 'Auto tool choice needs a tool call parser.', fixes: [ { label: 'Retry with --tool-call-parser hermes', action: (panel) => _serveAutoRetry(panel, '--tool-call-parser hermes'), autofix: true }, ], }, { pattern: /Please pass.*trust.remote.code=True|contains custom code which must be executed to correctly load/i, message: 'Model requires custom code. Enable --trust-remote-code.', fixes: [ { label: 'Retry with --trust-remote-code', action: (panel) => _serveAutoRetry(panel, '--trust-remote-code'), autofix: true }, ], }, { pattern: /does not recognize this architecture|model type.*but Transformers does not/i, message: 'Model architecture too new for installed vLLM/transformers.', fixes: [ { label: 'Try --trust-remote-code', action: (panel) => _serveAutoRetry(panel, '--trust-remote-code'), autofix: true }, { label: 'Update vLLM on server', action: () => { // Use the venv's python3 by absolute path when configured (SSH non- // interactive sessions often pick user-site Python over the venv). const _vp = (_envState.env === 'venv' && _envState.envPath) ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3'; _launchServeTask('update-vllm', 'pip-update', `${_vp} -m pip install -U vllm transformers`); }}, ], }, { pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i, message: 'Transformers/kernels package mismatch.', fixes: [ { label: 'Repair kernel package', action: () => { const _vp = (_envState.env === 'venv' && _envState.envPath) ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3'; _launchServeTask('repair-kernels', 'pip-update', `${_vp} -m pip install --user --break-system-packages kernels<0.15`); }}, { label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') }, ], }, { pattern: /ollama.*command not found/i, message: 'Ollama is not installed on this server. Run: curl -fsSL https://ollama.com/install.sh | sh', fixes: [ { label: 'Copy install command', action: () => _copyText('curl -fsSL https://ollama.com/install.sh | sh') }, ], }, { pattern: /llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'/i, message: 'llama-cpp-python server is not installed. Run: pip install "llama-cpp-python[server]"', fixes: [ { label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') }, { label: 'Copy install command', action: () => _copyText('pip install "llama-cpp-python[server]"') }, ], }, { pattern: /Windows Error 0xc000001d|Illegal instruction|0xc000001d/i, message: 'AVX2 Instruction Set Mismatch: the precompiled llama-cpp-python wheel requires CPU features (AVX2/FMA) that your processor or virtual machine lacks.', suggestion: 'Suggested action: switch this serve config to Ollama (highly recommended, has dynamic CPU fallbacks), or choose a remote Linux GPU server.', fixes: [ { label: 'Switch to Ollama', action: (panel) => _openServeEditFromDiagnosis(panel, { backend: 'ollama' }) }, { label: 'Choose remote server', action: (panel) => _openServeEditFromDiagnosis(panel) }, ], }, { pattern: /CUDA Toolkit not found|Unable to find cudart library|missing:\s*CUDA_CUDART/i, message: 'llama.cpp found nvcc, but the CUDA runtime library is missing.', suggestion: 'Suggested action: relaunch with the updated runner so llama.cpp builds CPU-only, or install a complete CUDA toolkit/runtime on this server for GPU llama.cpp.', fixes: [ { label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) }, { label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') }, ], }, { pattern: /No module named ['"]?torch|No module named ['"]?diffusers|diffusers.*command not found/i, message: 'Diffusion serving needs PyTorch and diffusers. Install diffusers from Cookbook → Dependencies.', fixes: [ { label: 'Open Dependencies', action: () => _openCookbookDependencies('diffusers') }, { label: 'Copy install command', action: () => _copyText('python3 -m pip install "diffusers[torch]"') }, ], }, { pattern: /Triton kernels.*Failed to import|cannot import name '\w+' from 'triton_kernels/i, message: 'Triton kernels version mismatch. Non-fatal warning — model will still run, just without optimized MoE kernels.', fixes: [ { label: 'Update triton on server', action: () => { const _vp = (_envState.env === 'venv' && _envState.envPath) ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3'; _launchServeTask('update-triton', 'pip-update', `${_vp} -m pip install -U triton triton-kernels`); }}, ], }, { pattern: /No space left on device|Disk quota exceeded|ENOSPC/i, message: 'Disk full on the server. Free up space before retrying.', fixes: [ { label: 'Check HF cache size', action: (panel) => _runQuickCmd(panel, 'du -sh ~/.cache/huggingface 2>/dev/null') }, ], }, { pattern: /Connection refused|Could not connect|Connection reset by peer/i, message: 'Network connection failed. Server may be unreachable or HuggingFace is down.', fixes: [ { label: 'Test HF connectivity', action: (panel) => _runQuickCmd(panel, 'curl -sI https://huggingface.co 2>&1 | head -3') }, ], }, { pattern: /attention_sink|sliding.window.*not supported|sliding_window.*incompatible/i, message: 'Model uses attention features unsupported in this vLLM version.', fixes: [ { label: 'Update vLLM on server', action: () => { const _vp = (_envState.env === 'venv' && _envState.envPath) ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3'; _launchServeTask('update-vllm', 'pip-update', `${_vp} -m pip install -U vllm`); }}, ], }, { // FlashInfer JIT-compiles attention kernels for the host GPU on first // use. If the system /usr/bin/nvcc is older than CUDA 11.8 it can't // target sm_89/sm_90 (Ada/Hopper), and the engine workers die before // they can report a useful traceback. Two quick paths out: pick a // non-flashinfer attention backend, or set CUDACXX to a newer nvcc // (vLLM installs nvidia-cuda-nvcc into the venv — point at that). pattern: /nvcc fatal\s+:\s+Unsupported gpu architecture 'compute_\d+'/i, message: 'FlashInfer is JIT-compiling sampling kernels with an nvcc too old for this GPU (no sm_89 / sm_90 support — pre-CUDA 11.8). Changing the attention backend does not help — flashinfer JITs the SAMPLER too. The clean fix is to set VLLM_USE_FLASHINFER_SAMPLER=0 so vLLM uses its native sampler instead.', suggestion: 'Suggested action: relaunch with VLLM_USE_FLASHINFER_SAMPLER=0 prepended. (Confirmed on the QuantTrio/Qwen3.5 model card as the canonical workaround.)', fixes: [ { label: 'Retry with VLLM_USE_FLASHINFER_SAMPLER=0', action: (panel) => _serveAutoRetryReplace(panel, '', 'VLLM_USE_FLASHINFER_SAMPLER=0 ', { prepend: true }) }, { label: 'Uninstall flashinfer-python', action: () => { // Hard fallback: vLLM 0.22 reaches into flashinfer for sampling kernels // even with VLLM_USE_FLASHINFER_SAMPLER=0 in some configs. Removing // the package forces it onto the native sampler. const _vp = (_envState.env === 'venv' && _envState.envPath) ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3'; _launchServeTask('uninstall-flashinfer', 'pip-update', `${_vp} -m pip uninstall flashinfer-python -y`); }}, { label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) }, ], }, { // vLLM <-> torch ABI mismatch: vLLM imports torch.library helpers // (`infer_schema`, `register_fake`, etc.) that only exist on newer torch // versions. When the installed torch is older, the import fails before // any server code runs. Fix is to reinstall vllm (which pulls a matching // torch) or upgrade torch directly. pattern: /ImportError: cannot import name '[^']+' from 'torch(\.\w+)+'/i, message: 'vLLM was built against a newer torch than what is installed. Reinstall vLLM so pip pulls a compatible torch (or upgrade torch directly).', fixes: [ { label: 'Reinstall vLLM (pulls matching torch)', action: () => { // Absolute path to the venv's python3 — bare `python3` lands in the // wrong site-packages over SSH when ~/.local/bin precedes the venv. const _vp = (_envState.env === 'venv' && _envState.envPath) ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3'; _launchServeTask('reinstall-vllm', 'pip-reinstall', `${_vp} -m pip install --force-reinstall vllm`); }}, { label: 'Upgrade torch only', action: () => { const _vp = (_envState.env === 'venv' && _envState.envPath) ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3'; _launchServeTask('upgrade-torch', 'pip-update', `${_vp} -m pip install -U torch`); }}, ], }, { // Tail-only + healthy-server suppression. tmux capture-pane returns the // entire scrollback every poll, so a one-shot startup traceback would // otherwise stick on the panel forever even while the server happily // serves /v1/models. Only fire if the traceback is in recent output AND // the server isn't currently logging healthy traffic. match: (text) => { const TAIL = text.slice(-4096); if (!/Traceback \(most recent call last\)/i.test(TAIL)) return false; // Healthy markers in the tail mean whatever blew up has been recovered // from — the server is up and answering requests. if (/Application startup complete|"GET \/v1\/[^"]+ HTTP\/[\d.]+" 2\d\d|Uvicorn running on/i.test(TAIL)) return false; return true; }, message: 'Python traceback detected — may be a handled error, check logs.', fixes: [ { label: 'Kill vLLM processes', action: (panel) => _runQuickCmd(panel, 'pkill -f vllm') }, ], }, ]; export function _diagnose(text) { for (const entry of ERROR_PATTERNS) { const hit = entry.match ? entry.match(text) : entry.pattern.test(text); if (hit) return entry; } return null; } function _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText) { const lines = ['## Odysseus Cookbook troubleshooting']; if (task) { lines.push( '', '### Task', `- ID: ${task.sessionId || task.id || 'unknown'}`, `- Type: ${task.type || 'unknown'}`, `- Status: ${task.status || 'unknown'}`, `- Model: ${task.payload?.repo_id || task.name || 'unknown'}`, `- Host: ${task.remoteHost || 'local'}${task.sshPort ? `:${task.sshPort}` : ''}`, ); } lines.push('', '### Diagnosis', diagnosis?.message || '(none)'); if (suggestionText) lines.push('', '### Suggested action', suggestionText.replace(/^Suggested action:\s*/i, '')); const cmd = task?.payload?._cmd || ''; if (cmd) lines.push('', '### Launch command', '```bash', cmd, '```'); if (sourceText) lines.push('', '### Captured output', '```text', String(sourceText).trim(), '```'); return lines.join('\n'); } export function _showDiagnosis(panel, diagnosis, sourceText) { const wasCollapsed = panel._lastDiagMsg === diagnosis.message && panel._diagCollapsed; if (panel._diagDismissed === diagnosis.message) return; panel._lastDiagMsg = diagnosis.message; panel._diagCollapsed = !!wasCollapsed; let diag = panel.querySelector('.cookbook-diagnosis'); if (!diag) { diag = document.createElement('div'); diag.className = 'cookbook-diagnosis'; const output = panel.querySelector('.cookbook-output-pre'); if (output) output.after(diag); else panel.appendChild(diag); } diag.classList.remove('hidden'); diag.innerHTML = ''; const taskEl = panel?.closest?.('.cookbook-task'); const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null; const fixes = [...(diagnosis.fixes || [])]; if (task?.type === 'serve' && task.payload?._cmd && !fixes.some(f => f.label === 'Edit serve')) { fixes.push({ label: 'Edit serve', action: (p) => _openServeEditFromDiagnosis(p) }); } const suggestionText = diagnosis.suggestion || (fixes.length ? `Suggested action: ${fixes[0].label}.` : 'Suggested action: copy the error and adjust the serve settings.'); panel._diagCollapsed = false; // Top-right toolbar: Copy bundle + × dismiss. Restored after user feedback // — without them there's no way to quietly close a stale diagnosis or grab // the full error+context for a forum/discord paste. const toolbar = document.createElement('div'); toolbar.className = 'cookbook-diag-toolbar'; // Left side carries the diagnosis text (message + suggestion); buttons // stay on the right. Was a separate body row below the toolbar, but // the message reads more like "this is what the toolbar is for" when // it sits inline with Copy / × Dismiss. toolbar.style.cssText = 'display:flex;align-items:flex-start;gap:8px;margin-bottom:-2px;'; const textWrap = document.createElement('div'); textWrap.style.cssText = 'flex:1;min-width:0;font-size:11px;line-height:1.35;'; const msg = document.createElement('div'); msg.className = 'cookbook-diag-message'; msg.textContent = diagnosis.message; textWrap.appendChild(msg); const suggestion = document.createElement('div'); suggestion.className = 'cookbook-diag-suggestion'; suggestion.textContent = suggestionText; suggestion.style.cssText = 'opacity:0.75;margin-top:1px;'; textWrap.appendChild(suggestion); toolbar.appendChild(textWrap); const copyBtn = document.createElement('button'); copyBtn.type = 'button'; copyBtn.className = 'cookbook-diag-copy'; copyBtn.title = 'Copy diagnosis details'; copyBtn.setAttribute('aria-label', 'Copy diagnosis'); copyBtn.innerHTML = ''; copyBtn.addEventListener('click', async (e) => { e.stopPropagation(); const bundle = _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText); try { await navigator.clipboard.writeText(bundle); copyBtn.classList.add('copied'); setTimeout(() => { if (copyBtn.isConnected) copyBtn.classList.remove('copied'); }, 1200); } catch (_) {} }); const dismissBtn = document.createElement('button'); dismissBtn.type = 'button'; dismissBtn.className = 'cookbook-diag-dismiss'; dismissBtn.title = 'Dismiss diagnosis'; dismissBtn.setAttribute('aria-label', 'Dismiss'); dismissBtn.textContent = '×'; dismissBtn.addEventListener('click', (e) => { e.stopPropagation(); panel._diagDismissed = diagnosis.message; _clearDiagnosis(panel); }); toolbar.appendChild(copyBtn); toolbar.appendChild(dismissBtn); diag.appendChild(toolbar); const runFix = async (fix, button, busyLabel = fix.label, onStart = null, onDone = null) => { if (!fix || !button || button.dataset.busy) return; button.dataset.busy = '1'; const _orig = button.textContent; const wp = spinnerModule.createWhirlpool(12); wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;'; button.textContent = ''; button.appendChild(wp.element); const _lbl = document.createElement('span'); _lbl.textContent = busyLabel; _lbl.style.verticalAlign = 'middle'; button.appendChild(_lbl); try { if (typeof onStart === 'function') onStart(); await fix.action(panel, sourceText); } catch (err) { console.error('[cookbook] diagnosis fix failed', err); } finally { if (button.isConnected) { try { wp.destroy(); } catch {} button.textContent = _orig; delete button.dataset.busy; } if (typeof onDone === 'function') onDone(); } }; if (fixes.length) { // Always render fixes as inline buttons. The old "Actions ▾" dropdown // (for >3 fixes) was broken — the menu wouldn't open in some panels and // hid useful actions behind a non-working affordance. Inline buttons wrap // naturally in `.cookbook-diag-fixes` (flex-wrap) so a long list reflows // onto multiple rows instead of getting collapsed. const row = document.createElement('div'); row.className = 'cookbook-diag-fixes'; for (const fix of fixes) { const btn = document.createElement('button'); btn.className = 'cookbook-btn cookbook-diag-btn'; btn.type = 'button'; btn.innerHTML = _diagFixIcon(fix.label) + '' + _diagEsc(fix.label) + ''; btn.addEventListener('click', (e) => { e.stopPropagation(); runFix(fix, btn); }); row.appendChild(btn); } body.appendChild(row); } } export function _clearDiagnosis(panel) { panel._lastDiagMsg = null; const diag = panel.querySelector('.cookbook-diagnosis'); if (diag) { diag.innerHTML = ''; diag.classList.add('hidden'); } } // ── Quick command ── export async function _runQuickCmd(panel, cmd) { let fullCmd = cmd; if (_envState.remoteHost) { fullCmd = _sshCmd(_envState.remoteHost, cmd); } const diag = panel.querySelector('.cookbook-diagnosis'); if (diag) { diag.classList.remove('hidden'); diag.textContent = `Running: ${fullCmd}...`; } try { const res = await fetch('/api/shell/stream', { method: 'POST', credentials: 'same-origin', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ command: fullCmd }), }); if (diag) diag.textContent = res.ok ? `Done: ${cmd}` : `Failed (HTTP ${res.status})`; } catch (e) { if (diag) diag.textContent = `Error: ${e.message}`; } }