Cookbook: scoring fixes, UI polish, false-finished + stale-state bug fixes

Backend (services/hwfit + routes): - rank_models picks visible set by REQUESTED column, not always score — sorting by Param now shows highest-param models PERIOD (incl. too_tight). - New fit_only param. Multi-GPU rigs filter GGUF Q*/IQ quants (vLLM/SGLang cannot serve them); default non-prequantized to BF16 on 2+ GPUs. - AWQ / GPTQ-8bit get a -1.0 quality penalty (was 0.0, tied with FP8), so FP8 wins when both fit. - Version-aware tiebreaker (parse Mn.n / Vn) — MiniMax-M2.7 ranks above M2.5 on equal composite score; >=100B integers not misread as versions. - /api/cookbook/hf-latest no longer drops models without an "NB" pattern in the repo id (MiniMax-M2.7, DeepSeek-V4-Pro etc. were silently filtered). - Cached-model scan: atexit flushes models JSON even if the script is killed mid-walk; each scan_dir wrapped in try/except; timeout 60s -> 180s. - KB granularity for sub-MB sizes (was "0 MB" for 12 KB shells). New "stalled" status for shells <1 MB with no .incomplete files. - /api/cookbook/state POST guard: rejects "done" download tasks lacking DOWNLOAD_OK / DOWNLOAD_FAILED / /snapshots/ when the last-mentioned shard is N<total — stops stale tabs from poisoning persisted state. - hf_models.json: add zai-org/GLM-5.1; flip zai-org/GLM-5 quantization Q4_K_M -> BF16 (it is the native base, not a quant). Frontend (static/js): - Scan/Download toolbar: quant defaults to All; ctx slider (8k/16k/32k/ 50k/128k/Max) ported from origin/main with sort=fit on drag, sort=score on Max. GPU toggle commits _activeCount to maxGpu on initial render. Fit column header tagged with active budget (RAM / GPU / N GPU). - Foldable Download admin-card: the Download h2 is the chevron trigger; state persists in localStorage. - Download card surfaces destination dir (Dir: <path>). Same dir on running task row, font/color matched to uptime (9px Fira Code muted, opacity .4). - Serve panel ctx text input always resets to model max on open. Sub-MB cached models show with red "download stalled" badge. - Bulk-select Cancel + Delete reset the Select button label on exit. - Cookbook running: false-finished bug fixed — DOWNLOAD_OK or /snapshots/ required; bare "Download complete" no longer marks the task done after the first config file. Clear button now sends tmux kill-session too. True overall % for multi-shard downloads: ((N-1)+frac)/total instead of hf_transfer per-shard aggregate. - Diagnosis card simplified: removed fold toggle, copy button, dismiss X. Suggestion font matches message body (12px). - HF token field flashes green check + "Saved" on save. - Cached scan no longer counts stalled rows as downloaded in Scan/Download. CSS: - dep Install button width pinned to 76px to match Installed split. - task-sub row +1px; task-status badge gets margin-right 8px. - Ctx slider styled like gallery editor sliders (thin pill rail, red thumb). - Bulk-select cancel button top -3px -> -5px.
2026-06-17 10:15:27 -04:00 · 2026-06-03 16:32:20 +09:00
parent ab0a480f30
commit eb79b76432
15 changed files with 1175 additions and 198 deletions
@@ -153,14 +153,31 @@ export function _renderGpuToggles(system) {
  }
  const validCounts = _validTpCounts(poolSize);
  const maxGpu = validCounts.length ? validCounts[validCounts.length - 1] : 0;
+  // Commit the data layer to maxGpu on initial render so it matches the
+  // visual highlight. Before this, _activeCount stayed undefined → no
+  // gpu_count param sent → backend's fallback could rank against RAM on
+  // mixed-resource boxes ("tightest" sorted by RAM instead of GPU).
+  if (container._activeCount === undefined && validCounts.length) {
+    container._activeCount = maxGpu;
+  }
  html += '<button class="hwfit-gpu-btn" data-count="0" title="CPU / RAM only">RAM</button>';
  const hasExplicitCount = typeof container._activeCount === 'number';
  for (const n of validCounts) {
    const text = n === 1 ? 'GPU' : n + ' GPU';
-    const isActive = hasExplicitCount ? (n === container._activeCount) : (container._activeCount === undefined && n === maxGpu);
+    const isActive = hasExplicitCount && n === container._activeCount;
    html += `<button class="hwfit-gpu-btn${isActive ? ' active' : ''}" data-count="${n}" title="${n} GPU${n > 1 ? 's' : ''}">${text}</button>`;
  }
+  // Also mark the RAM button active when the user explicitly chose RAM (0)
+  // — the loop above only handles GPU buttons.
+  if (container._activeCount === 0) {
+    const ramBtn = container.querySelector('.hwfit-gpu-btn[data-count="0"]');
+    // (we just set innerHTML so we re-mark below after assignment)
+  }
  container.innerHTML = html;
+  if (container._activeCount === 0) {
+    const ramBtn = container.querySelector('.hwfit-gpu-btn[data-count="0"]');
+    if (ramBtn) ramBtn.classList.add('active');
+  }

  // Pool dropdown: switch pools, reset the count to the new pool's max, rebuild.
  const sel = container.querySelector('#hwfit-gpu-group');
@@ -188,9 +205,12 @@ export function _renderGpuToggles(system) {
      } else {
        btn.classList.add('active');
        container._activeCount = count;
-        // Auto-set quant based on hardware selection
+        // Auto-suggest a quant based on hardware selection — but ONLY when the
+        // user has already picked a specific quant. When they're on "All"
+        // (value === ""), leave them on All: toggling a GPU shouldn't silently
+        // yank them out of the All view they wanted to see.
        const quantSel = document.getElementById('hwfit-quant');
-        if (quantSel) {
+        if (quantSel && quantSel.value !== '') {
          if (count <= 1) {
            quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot
          } else {
@@ -211,9 +231,34 @@ export function _renderGpuToggles(system) {
 // reload paints instantly, then we refresh in the background and swap.
 const _SCAN_CACHE_KEY = 'hwfit_scan_cache_v1';
 const _MANUAL_HW_KEY = 'hwfit_manual_hardware_v1';
+const _CTX_KEY = 'hwfit_target_context_v1';
+const _CTX_PRESETS = [8192, 16384, 32768, 50000, 131072, 0]; // 0 = model max
 const _SCAN_CACHE_MAX = 12;            // keep the newest N signatures
 const _SCAN_CACHE_TTL = 6 * 3600 * 1000; // 6 h — hardware rarely changes

+// Ctx slider helpers (ported from origin/main). The slider picks an INDEX into
+// _CTX_PRESETS; _ctxValue() resolves it to a token count (0 = "Max"). The label
+// next to the slider re-renders to "8k" / "16k" / … / "Max".
+function _ctxLabel(value) {
+  const n = Number(value) || 0;
+  if (!n) return 'Max';
+  return n >= 1000 ? Math.round(n / 1000) + 'k' : String(n);
+}
+function _ctxValue() {
+  const slider = document.getElementById('hwfit-context');
+  const idx = Math.max(0, Math.min(_CTX_PRESETS.length - 1, Number(slider?.value ?? 3) || 0));
+  return _CTX_PRESETS[idx] || 0;
+}
+function _syncCtxControl() {
+  const slider = document.getElementById('hwfit-context');
+  const label = document.getElementById('hwfit-context-label');
+  if (!slider) return;
+  const saved = localStorage.getItem(_CTX_KEY);
+  const savedIdx = saved == null ? 3 : _CTX_PRESETS.indexOf(Number(saved));
+  slider.value = String(savedIdx >= 0 ? savedIdx : 3);
+  if (label) label.textContent = _ctxLabel(_ctxValue());
+}
+
 function _manualHwState() {
  try {
    const s = JSON.parse(localStorage.getItem(_MANUAL_HW_KEY) || '{}');
@@ -749,6 +794,13 @@ export function _hwfitRenderList(el, models) {
  const sortSel = document.getElementById('hwfit-sort');
  const currentSort = sortSel?.value || 'score';
  const isReversed = sortSel?.dataset.reverse === '1';
+  // Active budget for the Fit column label \u2014 make it obvious whether the
+  // ranking is against GPU or RAM so "tightest" can't be ambiguous on a
+  // mixed-resource box.
+  const tc = document.getElementById('hwfit-gpu-toggles');
+  const _budget = (tc && typeof tc._activeCount === 'number')
+    ? (tc._activeCount === 0 ? 'RAM' : (tc._activeCount === 1 ? 'GPU' : tc._activeCount + ' GPU'))
+    : null;
  let html = '<div class="hwfit-row hwfit-header">';
  for (const col of _hwfitColumns) {
    const sortable = col.key ? ' hwfit-sortable' : '';
@@ -760,7 +812,10 @@ export function _hwfitRenderList(el, models) {
      arrow = isReversed ? ' \u25B2' : ' \u25BC';
    }
    const dataAttr = col.key ? ` data-sort="${col.key}"` : '';
-    html += `<span class="hwfit-col ${col.cls}${sortable}${active}"${dataAttr}>${col.label}${arrow}</span>`;
+    const label = (col.cls === 'hwfit-fit' && _budget)
+      ? `${col.label} <span style="font-size:0.75em;opacity:0.6;font-weight:normal;">(${_budget})</span>`
+      : col.label;
+    html += `<span class="hwfit-col ${col.cls}${sortable}${active}"${dataAttr}>${label}${arrow}</span>`;
  }
  html += '</div>';
  for (const m of models) {
@@ -1082,11 +1137,40 @@ export function _hwfitInit() {
  const uc = document.getElementById('hwfit-usecase');
  const sort = document.getElementById('hwfit-sort');
  const qpref = document.getElementById('hwfit-quant');
+  const ctx = document.getElementById('hwfit-context');
+  const ctxLabel = document.getElementById('hwfit-context-label');
  const search = document.getElementById('hwfit-search');
  const remote = document.getElementById('hwfit-host');
+  _syncCtxControl();
  if (uc) uc.addEventListener('change', () => _hwfitFetch());
  if (sort) sort.addEventListener('change', () => _hwfitFetch());
  if (qpref) qpref.addEventListener('change', () => _hwfitFetch());
+  if (ctx && !ctx.dataset.bound) {
+    ctx.dataset.bound = '1';
+    ctx.addEventListener('input', () => {
+      if (ctxLabel) ctxLabel.textContent = _ctxLabel(_ctxValue());
+    });
+    ctx.addEventListener('change', () => {
+      const targetCtx = _ctxValue();
+      try { localStorage.setItem(_CTX_KEY, String(targetCtx)); } catch {}
+      // Ctx drag affects sort mode: a specific ctx target (anything < Max)
+      // implies the user is hunting for "what fits at this context length",
+      // so re-rank by fit (lowest first). Dragging back to Max means no
+      // ctx constraint → go back to the default score-based ranking.
+      const sortSel = document.getElementById('hwfit-sort');
+      if (sortSel) {
+        if (targetCtx) {
+          sortSel.value = 'fit';
+          sortSel.dataset.reverse = '1';
+        } else {
+          sortSel.value = 'score';
+          sortSel.dataset.reverse = '';
+        }
+      }
+      _hwfitCache = null;
+      _hwfitFetch();
+    });
+  }
  // Rescan — force a fresh hardware probe (bypasses the per-host cache).
  const rescan = document.getElementById('hwfit-rescan');
  if (rescan && !rescan.dataset.bound) {