Cookbook: scoring fixes, UI polish, false-finished + stale-state bug fixes

Backend (services/hwfit + routes):
- rank_models picks visible set by REQUESTED column, not always score —
  sorting by Param now shows highest-param models PERIOD (incl. too_tight).
- New fit_only param. Multi-GPU rigs filter GGUF Q*/IQ quants (vLLM/SGLang
  cannot serve them); default non-prequantized to BF16 on 2+ GPUs.
- AWQ / GPTQ-8bit get a -1.0 quality penalty (was 0.0, tied with FP8), so
  FP8 wins when both fit.
- Version-aware tiebreaker (parse Mn.n / Vn) — MiniMax-M2.7 ranks above
  M2.5 on equal composite score; >=100B integers not misread as versions.
- /api/cookbook/hf-latest no longer drops models without an "NB" pattern in
  the repo id (MiniMax-M2.7, DeepSeek-V4-Pro etc. were silently filtered).
- Cached-model scan: atexit flushes models JSON even if the script is
  killed mid-walk; each scan_dir wrapped in try/except; timeout 60s -> 180s.
- KB granularity for sub-MB sizes (was "0 MB" for 12 KB shells). New
  "stalled" status for shells <1 MB with no .incomplete files.
- /api/cookbook/state POST guard: rejects "done" download tasks lacking
  DOWNLOAD_OK / DOWNLOAD_FAILED / /snapshots/ when the last-mentioned
  shard is N<total — stops stale tabs from poisoning persisted state.
- hf_models.json: add zai-org/GLM-5.1; flip zai-org/GLM-5 quantization
  Q4_K_M -> BF16 (it is the native base, not a quant).

Frontend (static/js):
- Scan/Download toolbar: quant defaults to All; ctx slider (8k/16k/32k/
  50k/128k/Max) ported from origin/main with sort=fit on drag, sort=score
  on Max. GPU toggle commits _activeCount to maxGpu on initial render. Fit
  column header tagged with active budget (RAM / GPU / N GPU).
- Foldable Download admin-card: the Download h2 is the chevron trigger;
  state persists in localStorage.
- Download card surfaces destination dir (Dir: <path>). Same dir on running
  task row, font/color matched to uptime (9px Fira Code muted, opacity .4).
- Serve panel ctx text input always resets to model max on open. Sub-MB
  cached models show with red "download stalled" badge.
- Bulk-select Cancel + Delete reset the Select button label on exit.
- Cookbook running: false-finished bug fixed — DOWNLOAD_OK or /snapshots/
  required; bare "Download complete" no longer marks the task done after
  the first config file. Clear button now sends tmux kill-session too.
  True overall % for multi-shard downloads: ((N-1)+frac)/total instead of
  hf_transfer per-shard aggregate.
- Diagnosis card simplified: removed fold toggle, copy button, dismiss X.
  Suggestion font matches message body (12px).
- HF token field flashes green check + "Saved" on save.
- Cached scan no longer counts stalled rows as downloaded in Scan/Download.

CSS:
- dep Install button width pinned to 76px to match Installed split.
- task-sub row +1px; task-status badge gets margin-right 8px.
- Ctx slider styled like gallery editor sliders (thin pill rail, red thumb).
- Bulk-select cancel button top -3px -> -5px.
This commit is contained in:
pewdiepie-archdaemon
2026-06-03 16:32:20 +09:00
parent ab0a480f30
commit eb79b76432
15 changed files with 1175 additions and 198 deletions
+215 -60
View File
@@ -27,6 +27,56 @@ import spinnerModule from './spinner.js';
// ── Error diagnosis ──
function _openCookbookDependencies(pkgName = '') {
const cookbook = window.cookbookModule;
if (cookbook && typeof cookbook.open === 'function') {
cookbook.open({ tab: 'Dependencies' });
} else {
document.getElementById('tool-cookbook-btn')?.click();
}
const wanted = String(pkgName || '').toLowerCase();
const tryHighlight = (attempt = 0) => {
const modal = document.getElementById('cookbook-modal');
const tab = modal?.querySelector('.cookbook-tab[data-backend="Dependencies"]');
if (tab && !tab.classList.contains('active')) tab.click();
const rows = [...document.querySelectorAll('#cookbook-deps-list [data-pkg-name]')];
if (!rows.length) {
if (attempt < 45) setTimeout(() => tryHighlight(attempt + 1), 100);
return;
}
if (!wanted) return;
const row = rows.find(r => {
const name = (r.dataset.pkgName || '').toLowerCase();
const pip = (r.dataset.depPip || '').toLowerCase();
return name === wanted || pip.includes(wanted) || wanted.includes(name);
});
if (row) {
row.scrollIntoView({ block: 'center' });
row.classList.add('cookbook-pkg-flash');
setTimeout(() => row.classList.remove('cookbook-pkg-flash'), 1800);
}
};
tryHighlight();
}
function _openServeEditFromDiagnosis(panel, fields = null) {
const task = panel?.closest?.('.cookbook-task');
if (!task) return;
task.dispatchEvent(new CustomEvent('cookbook:edit-serve', { bubbles: true, detail: { fields } }));
}
function _openCpuServeEdit(panel) {
_openServeEditFromDiagnosis(panel, {
backend: 'llamacpp',
gpus: '',
tp: '1',
gpu_mem: '0.80',
_forceBackend: true,
});
}
// Infer the gated base repo that single-file checkpoints need configs from
function _inferBaseRepo(text) {
if (!text) return null;
@@ -70,17 +120,12 @@ export const ERROR_PATTERNS = [
},
{
pattern: /not divisible by weight quantization|quantization block/i,
message: 'Model quantization format incompatible with this vLLM version. Try a different quant (AWQ) or update vLLM.',
message: 'FP8 MoE quantization is incompatible with this tensor-parallel split.',
suggestion: 'Suggested action: retry with a lower tensor-parallel size, such as TP=4 or TP=2. If it still fails, use a non-FP8/GGUF version of the model.',
fixes: [
{ label: 'Update vLLM on server', action: (panel) => {
const taskEl = panel.closest('.cookbook-task');
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
const host = task?.remoteHost || '';
const prefix = _buildEnvPrefix();
const pipCmd = prefix ? prefix + ' pip install -U vllm' : 'pip install -U vllm';
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
_launchServeTask('update-vllm', 'pip-update', cmd);
}},
{ label: 'Retry with TP=4', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '4') },
{ label: 'Retry with TP=2', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '2') },
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
],
},
{
@@ -218,6 +263,7 @@ export const ERROR_PATTERNS = [
pattern: /vllm.*command not found|No module named vllm/i,
message: 'vLLM is not installed or not in PATH.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') },
{ label: 'Check environment is set', action: (panel) => {
const el = panel.querySelector('[data-field="env_type"]');
if (el) { el.focus(); el.style.borderColor = 'var(--red)'; }
@@ -226,11 +272,21 @@ export const ERROR_PATTERNS = [
},
{
pattern: /sglang.*command not found|No module named sglang|SGLang is not installed/i,
message: 'SGLang is not installed or not in PATH. Open Cookbook → Dependencies and install sglang on this server.',
message: 'SGLang is not installed or not in PATH.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "sglang[all]"') },
],
},
{
pattern: /No accelerator \(CUDA, XPU, HPU, NPU, MUSA, MPS\) is available|Triton is not supported on current platform/i,
message: 'SGLang needs a visible GPU/accelerator on this server.',
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
fixes: [
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
],
},
{
pattern: /flashinfer.*version.*does not match|flashinfer-cubin version/i,
message: 'FlashInfer version mismatch.',
@@ -241,8 +297,12 @@ export const ERROR_PATTERNS = [
},
{
pattern: /torch\.cuda\.is_available\(\).*False|No CUDA runtime/i,
message: 'CUDA not available in this environment.',
fixes: [],
message: 'vLLM needs a visible CUDA/ROCm GPU.',
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
fixes: [
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
],
},
{
pattern: /Engine core initialization failed/i,
@@ -295,17 +355,20 @@ export const ERROR_PATTERNS = [
},
{
pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i,
message: 'vLLM/Transformers kernel package mismatch.',
message: 'Transformers/kernels package mismatch.',
fixes: [
{ label: 'Update vLLM/Transformers/kernels', action: (panel) => {
{ label: 'Repair kernel package', action: (panel) => {
const taskEl = panel.closest('.cookbook-task');
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
const host = task?.remoteHost || '';
const prefix = _buildEnvPrefix();
const pipCmd = prefix ? prefix + ' python3 -m pip install -U vllm transformers kernels' : 'python3 -m pip install -U vllm transformers kernels';
const pipCmd = prefix
? prefix + ' python3 -m pip install --user --break-system-packages "kernels<0.15"'
: 'python3 -m pip install --user --break-system-packages "kernels<0.15"';
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
_launchServeTask('update-vllm-stack', 'pip-update', cmd);
_launchServeTask('repair-kernels', 'pip-update', cmd);
}},
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
],
},
{
@@ -319,13 +382,24 @@ export const ERROR_PATTERNS = [
pattern: /llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'/i,
message: 'llama-cpp-python server is not installed. Run: pip install "llama-cpp-python[server]"',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
{ label: 'Copy install command', action: () => _copyText('pip install "llama-cpp-python[server]"') },
],
},
{
pattern: /CUDA Toolkit not found|Unable to find cudart library|missing:\s*CUDA_CUDART/i,
message: 'llama.cpp found nvcc, but the CUDA runtime library is missing.',
suggestion: 'Suggested action: relaunch with the updated runner so llama.cpp builds CPU-only, or install a complete CUDA toolkit/runtime on this server for GPU llama.cpp.',
fixes: [
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
],
},
{
pattern: /No module named ['"]?torch|No module named ['"]?diffusers|diffusers.*command not found/i,
message: 'Diffusion serving needs PyTorch and diffusers. Install diffusers from Cookbook → Dependencies.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('diffusers') },
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "diffusers[torch]"') },
],
},
@@ -402,10 +476,32 @@ export function _diagnose(text) {
return null;
}
function _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText) {
const lines = ['## Odysseus Cookbook troubleshooting'];
if (task) {
lines.push(
'',
'### Task',
`- ID: ${task.sessionId || task.id || 'unknown'}`,
`- Type: ${task.type || 'unknown'}`,
`- Status: ${task.status || 'unknown'}`,
`- Model: ${task.payload?.repo_id || task.name || 'unknown'}`,
`- Host: ${task.remoteHost || 'local'}${task.sshPort ? `:${task.sshPort}` : ''}`,
);
}
lines.push('', '### Diagnosis', diagnosis?.message || '(none)');
if (suggestionText) lines.push('', '### Suggested action', suggestionText.replace(/^Suggested action:\s*/i, ''));
const cmd = task?.payload?._cmd || '';
if (cmd) lines.push('', '### Launch command', '```bash', cmd, '```');
if (sourceText) lines.push('', '### Captured output', '```text', String(sourceText).trim(), '```');
return lines.join('\n');
}
export function _showDiagnosis(panel, diagnosis, sourceText) {
if (panel._lastDiagMsg === diagnosis.message) return;
if (panel._diagDismissed === diagnosis.message) return; // stay dismissed until new error
const wasCollapsed = panel._lastDiagMsg === diagnosis.message && panel._diagCollapsed;
if (panel._diagDismissed === diagnosis.message) return;
panel._lastDiagMsg = diagnosis.message;
panel._diagCollapsed = !!wasCollapsed;
let diag = panel.querySelector('.cookbook-diagnosis');
if (!diag) {
@@ -417,57 +513,116 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
}
diag.classList.remove('hidden');
diag.innerHTML = '';
const taskEl = panel?.closest?.('.cookbook-task');
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
const fixes = [...(diagnosis.fixes || [])];
if (task?.type === 'serve' && task.payload?._cmd && !fixes.some(f => f.label === 'Edit serve')) {
fixes.push({ label: 'Edit serve', action: (p) => _openServeEditFromDiagnosis(p) });
}
const suggestionText = diagnosis.suggestion || (fixes.length
? `Suggested action: ${fixes[0].label}.`
: 'Suggested action: copy the error and adjust the serve settings.');
const header = document.createElement('div');
header.style.cssText = 'display:flex;align-items:center;justify-content:space-between;';
// Simplified diagnosis card: just the error message + suggestion + fix
// button(s). Removed the fold toggle, copy button, and × dismiss — they
// made the card noisy without earning their keep. _diagCollapsed is kept
// as a stub so callers don't have to change.
panel._diagCollapsed = false;
const body = document.createElement('div');
body.className = 'cookbook-diag-body';
const msg = document.createElement('div');
msg.className = 'cookbook-diag-message';
msg.textContent = diagnosis.message;
header.appendChild(msg);
body.appendChild(msg);
const suggestion = document.createElement('div');
suggestion.className = 'cookbook-diag-suggestion';
suggestion.textContent = suggestionText;
body.appendChild(suggestion);
diag.appendChild(body);
const dismiss = document.createElement('button');
dismiss.className = 'close-btn';
dismiss.style.cssText = 'width:16px;height:16px;font-size:9px;flex-shrink:0;';
dismiss.textContent = '\u2715';
dismiss.addEventListener('click', () => { panel._diagDismissed = diagnosis.message; _clearDiagnosis(panel); });
header.appendChild(dismiss);
const runFix = async (fix, button, busyLabel = fix.label, onStart = null, onDone = null) => {
if (!fix || !button || button.dataset.busy) return;
button.dataset.busy = '1';
const _orig = button.textContent;
const wp = spinnerModule.createWhirlpool(12);
wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;';
button.textContent = '';
button.appendChild(wp.element);
const _lbl = document.createElement('span');
_lbl.textContent = busyLabel;
_lbl.style.verticalAlign = 'middle';
button.appendChild(_lbl);
try {
if (typeof onStart === 'function') onStart();
await fix.action(panel, sourceText);
} catch (err) {
console.error('[cookbook] diagnosis fix failed', err);
} finally {
if (button.isConnected) {
try { wp.destroy(); } catch {}
button.textContent = _orig;
delete button.dataset.busy;
}
if (typeof onDone === 'function') onDone();
}
};
diag.appendChild(header);
if (diagnosis.fixes && diagnosis.fixes.length) {
if (fixes.length) {
const row = document.createElement('div');
row.className = 'cookbook-diag-fixes';
for (const fix of diagnosis.fixes) {
const btn = document.createElement('button');
btn.className = 'cookbook-btn cookbook-diag-btn';
btn.textContent = fix.label;
btn.addEventListener('click', async () => {
if (btn.dataset.busy) return;
btn.dataset.busy = '1';
// Spinner feedback while the fix runs (kill + relaunch takes a moment).
const _orig = btn.textContent;
const wp = spinnerModule.createWhirlpool(12);
wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;';
btn.textContent = '';
btn.appendChild(wp.element);
const _lbl = document.createElement('span');
_lbl.textContent = _orig;
_lbl.style.verticalAlign = 'middle';
btn.appendChild(_lbl);
try {
await fix.action(panel, sourceText);
} catch (e) {
console.error('[cookbook] diagnosis fix failed', e);
} finally {
// Retries animate the whole card away (button goes with it). For fixes
// that leave the card in place, restore the label.
if (btn.isConnected) { try { wp.destroy(); } catch {} btn.textContent = _orig; delete btn.dataset.busy; }
}
});
row.appendChild(btn);
if (fixes.length <= 3) {
for (const fix of fixes) {
const btn = document.createElement('button');
btn.className = 'cookbook-btn cookbook-diag-btn';
btn.type = 'button';
btn.textContent = fix.label;
btn.addEventListener('click', (e) => {
e.stopPropagation();
runFix(fix, btn);
});
row.appendChild(btn);
}
body.appendChild(row);
return;
}
diag.appendChild(row);
const wrap = document.createElement('div');
wrap.className = 'cookbook-diag-actions';
const trigger = document.createElement('button');
trigger.className = 'cookbook-btn cookbook-diag-action-trigger';
trigger.type = 'button';
trigger.textContent = 'Actions';
trigger.appendChild(document.createTextNode(' ▾'));
wrap.appendChild(trigger);
const menu = document.createElement('div');
menu.className = 'dropdown cookbook-diag-menu hidden';
for (const fix of fixes) {
const item = document.createElement('button');
item.type = 'button';
item.textContent = fix.label;
item.addEventListener('click', async (e) => {
e.stopPropagation();
if (item.dataset.busy || trigger.dataset.busy) return;
item.dataset.busy = '1';
await runFix(fix, trigger, fix.label, () => menu.classList.add('hidden'), () => delete item.dataset.busy);
});
menu.appendChild(item);
}
wrap.appendChild(menu);
trigger.addEventListener('click', (e) => {
e.stopPropagation();
if (trigger.dataset.busy) return;
document.querySelectorAll('.cookbook-diag-menu').forEach(m => {
if (m !== menu) m.classList.add('hidden');
});
menu.classList.toggle('hidden');
});
row.appendChild(wrap);
body.appendChild(row);
}
}