Cookbook UI: Ollama browser, advanced serve fold, API tokens form, diagnosis toolbar, polish

Surface a lot of accumulated cookbook + UI work as a single non-agent commit so the agent rework lands cleanly. Highlights: - Ollama as a first-class backend in the Cookbook: * Download input accepts ollama-style names (name:tag) → backend=ollama * /api/cookbook/ollama/library (cached scrape of ollama.com + curated fallback so classic models like qwen2.5 stay reachable) * "Browse Ollama library" toggle below Download with size chips * Engine=Ollama in hwfit toolbar merges the Ollama library into the main scan list as per-tag rows with the same Fit/Param/Quant/VRAM columns; click → fills Download input - API Tokens form added to Integrations panel (matching wired loadTokens()/initTokenForm() that had no HTML) - Serve panel polish: Advanced fold tightening (-8px nudges on vLLM checks, Extra args, Spec row), n_cpu_moe + Split Mode controls pulled up 8px to align with the row's checkboxes, GGUF File dropdown exposed for Ollama backend, GPU re-render on Edit serve restore, _forceBackend flag so saved serveState wins over backend detection, cookbook:servers-changed CustomEvent so panels don't need refresh - Models page redesign: Add Models row (URL + hidden API key reveal + Type select + Scan/Ollama/Key/Test/Add icon buttons), Probe All + Clear-offline buttons in Added Models toolbar, offline-pill removed (opacity already conveys state), Engine dropdown gains Ollama option - _ping_endpoint probes /v1/models then base, accepts 4xx as reachable (vLLM returns 404 on bare /v1, fully working endpoints were showing offline) - Diagnosis card: × dismiss + Copy bundle buttons restored on the serve error feedback card - Orphan tmux sweep re-enabled behind a 60s rate-limit + background Thread (off the main event loop) so dead serves get discovered - cookbook_routes auto-register watchdog: drops the endpoint if the serve session exits non-zero within the first ~3min - ollama-rocm sidecar awareness in download wrapper (`docker exec ollama-rocm ollama pull` when host ollama isn't installed) - Skill extractor sets initial_status="published" when auto_approve_skills pref is on (audit demotes later) - Skill list / model list / cookbook scan misc polish
2026-06-30 00:22:10 -04:00 · 2026-06-08 22:38:49 +09:00
parent 646f8bd2a9
commit fa8c93ec0a
28 changed files with 3033 additions and 1026 deletions
@@ -416,9 +416,11 @@ function _hwfitShowError(list, host, detail) {
  if (rb) rb.addEventListener('click', () => { _resetGpuToggleState(); _hwfitFetch(true); });
 }

-// Client-side "Engine" filter (llama.cpp / vLLM / SGLang). Empty = show all.
-// Uses the same _detectBackend() the serve commands use, so what you filter to
-// is exactly what would be launched. Pure view filter — no refetch needed.
+// Client-side "Engine" filter (llama.cpp / vLLM / SGLang / Ollama). Empty =
+// show all. Uses the same _detectBackend() the serve commands use, so what you
+// filter to is exactly what would be launched. Pure view filter — no refetch
+// needed. Ollama rows are merged into the main list (see _ensureOllamaLib +
+// _ollamaToHwfitRows below) so the filter handles all engines uniformly.
 function _applyEngineFilter(models) {
  const want = document.getElementById('hwfit-engine')?.value || '';
  if (!want || !Array.isArray(models)) return models || [];
@@ -427,6 +429,86 @@ function _applyEngineFilter(models) {
  });
 }

+// Ollama library cache (per-page). Filled lazily on first _hwfitFetch; the raw
+// list is the same shape returned by /api/cookbook/ollama/library, then turned
+// into per-tag hwfit rows so they slot into the main list grid alongside HF
+// scan results.
+let _ollamaLibCache = null;
+async function _ensureOllamaLib() {
+  if (_ollamaLibCache) return _ollamaLibCache;
+  try {
+    const res = await fetch('/api/cookbook/ollama/library');
+    const data = await res.json();
+    _ollamaLibCache = Array.isArray(data?.models) ? data.models : [];
+  } catch { _ollamaLibCache = []; }
+  return _ollamaLibCache;
+}
+
+// Convert an Ollama library entry's sizes into per-tag hwfit rows. Shape
+// matches what _hwfitRenderList expects (fit_level, parameter_count,
+// required_gb, score, …) so the rows render identically to HF results.
+function _olParseSize(s) {
+  // "14b" → 14, "1.5b" → 1.5, "8x7b" → 56 (rough), "135m" → 0.135, "latest" → null
+  if (!s) return null;
+  const low = s.toLowerCase();
+  let m = low.match(/^(\d+(?:\.\d+)?)x(\d+(?:\.\d+)?)b$/);
+  if (m) return parseFloat(m[1]) * parseFloat(m[2]);
+  m = low.match(/^(\d+(?:\.\d+)?)b$/);
+  if (m) return parseFloat(m[1]);
+  m = low.match(/^(\d+(?:\.\d+)?)m$/);
+  if (m) return parseFloat(m[1]) / 1000;
+  return null;
+}
+function _ollamaToHwfitRows(libModels, vramAvail, ramAvail) {
+  const out = [];
+  if (!Array.isArray(libModels)) return out;
+  for (const m of libModels) {
+    const sizes = (Array.isArray(m.sizes) && m.sizes.length) ? m.sizes : ['latest'];
+    for (const sz of sizes) {
+      const params = _olParseSize(sz);
+      // Ollama default GGUF is ~Q4_K_M. Rough VRAM estimate: 0.6 GB / B.
+      const vramGb = params ? params * 0.6 : 0;
+      let fitLevel = 'no_fit';
+      if (vramGb && vramAvail) {
+        if (vramGb <= vramAvail * 0.6) fitLevel = 'perfect';
+        else if (vramGb <= vramAvail) fitLevel = 'good';
+        else if (ramAvail && vramGb <= ramAvail) fitLevel = 'marginal';
+        else fitLevel = 'too_tight';
+      } else if (vramGb && ramAvail && vramGb <= ramAvail) {
+        fitLevel = 'marginal';
+      }
+      const tag = `${m.name}:${sz}`;
+      const paramsLabel = params
+        ? (params >= 1 ? params.toFixed(params >= 10 ? 0 : 1) + 'B' : (params * 1000).toFixed(0) + 'M')
+        : '?';
+      // A modest score so Ollama rows still sort sensibly in the default
+      // score view — bigger models get a slightly higher base, but they
+      // always come in below well-scored HF results. Sort by Fit or VRAM
+      // to surface them more aggressively.
+      const score = params ? Math.min(30 + params * 0.3, 60) : 25;
+      out.push({
+        name: tag,
+        repo_id: tag,
+        quant: 'Q4_K_M',
+        parameter_count: paramsLabel,
+        params_b: params || 0,
+        required_gb: vramGb,
+        fit_level: fitLevel,
+        score,
+        speed_tps: 0,
+        context: 0,
+        is_gguf: true,
+        backend: 'ollama',
+        _isOllama: true,
+        _olName: m.name,
+        _olSize: sz,
+        _description: m.description || '',
+      });
+    }
+  }
+  return out;
+}
+
 export async function _hwfitFetch(fresh = false) {
  const _tk = ++_hwfitFetchToken;
  const useCase = document.getElementById('hwfit-usecase')?.value || '';
@@ -475,7 +557,12 @@ export async function _hwfitFetch(fresh = false) {
    _setLastCacheHost(remoteKey);
    const _cacheSrv = _serverByVal(_envState.remoteServerKey || remoteHost);
    const _cachePort = _cacheSrv?.port || '';
-    const _cacheParams = new URLSearchParams({ host: remoteHost }); if (_cachePort) _cacheParams.set('ssh_port', _cachePort); if (_cacheSrv?.platform) _cacheParams.set('platform', _cacheSrv.platform);
+    const _cacheParams = new URLSearchParams();
+    if (remoteHost) {
+      _cacheParams.set('host', remoteHost);
+      if (_cachePort) _cacheParams.set('ssh_port', _cachePort);
+      if (_cacheSrv?.platform) _cacheParams.set('platform', _cacheSrv.platform);
+    }
    fetch(`/api/model/cached?${_cacheParams}`, { credentials: 'same-origin' })
      .then(r => r.json())
      .then(d => {
@@ -543,7 +630,18 @@ export async function _hwfitFetch(fresh = false) {
    // A newer scan started while this one was in flight (user switched servers
    // mid-probe) — drop this stale response so it can't clobber the new one.
    if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
-    if (!res.ok) throw new Error(res.statusText);
+    if (!res.ok) {
+      const body = await res.text().catch(() => '');
+      let msg = '';
+      try {
+        const payload = JSON.parse(body);
+        msg = payload && (payload.detail || payload.error || payload.message);
+      } catch {
+        msg = body;
+      }
+      msg = typeof msg === 'string' ? msg.trim() : '';
+      throw new Error(`HTTP ${res.status} ${res.statusText}${msg ? `: ${msg}` : ''}`);
+    }
    let data = await res.json();
    if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
    if (!isImageMode && quantPref && !data.error && Array.isArray(data.models) && data.models.length === 0) {
@@ -583,6 +681,23 @@ export async function _hwfitFetch(fresh = false) {
      if (!_cached) { _hwfitShowError(list, remoteHost, data.error); if (hw) hw.innerHTML = ''; }
      return;
    }
+    // Merge Ollama library rows into the main list so they appear with the
+    // same Fit/Param/Quant/VRAM/Mode columns as HF results and respond to the
+    // Engine filter. Skipped in image-gen mode (Ollama doesn't serve diffusers).
+    if (!isImageMode) {
+      const _vramAvail = data.system?.gpu_vram_gb || 0;
+      const _ramAvail = data.system?.total_ram_gb || 0;
+      const _lib = await _ensureOllamaLib();
+      const _olRows = _ollamaToHwfitRows(_lib, _vramAvail, _ramAvail);
+      // Search filter on Ollama rows: HF API already filters by search; do the
+      // same client-side over Ollama name + description so the search box
+      // works consistently across both sources.
+      const _s = (search || '').trim().toLowerCase();
+      const _olFiltered = _s
+        ? _olRows.filter(r => r.name.toLowerCase().includes(_s) || (r._description || '').toLowerCase().includes(_s))
+        : _olRows;
+      data.models = (data.models || []).concat(_olFiltered);
+    }
    _hwfitCache = data;
    _hwfitRenderHw(hw, data.system);
    // Propagate local platform from hardware probe so _isWindows(task) works
@@ -964,14 +1079,36 @@ export function _hwfitRenderList(el, models) {
    html += `</div>`;
  }
  el.innerHTML = html;
-  // Click row → expand inline action panel
+  // Click row → expand inline action panel. Exception: Ollama rows skip the
+  // expand panel (no HF metadata to power it) and just fill the Download
+  // input with the `<name>:<size>` tag — one click → ready to pull.
  el.querySelectorAll('.hwfit-row:not(.hwfit-header)').forEach(row => {
    row.addEventListener('click', () => {
      const name = row.dataset.model;
      if (!name) return;
-      // Find model data from cache
      const modelData = (_hwfitCache?.models || []).find(m => m.name === name);
      if (!modelData) return;
+      if (modelData._isOllama) {
+        // Force-open the Download card if it's been collapsed — otherwise
+        // filling the (hidden) input silently swallows the click.
+        const dlBody = document.getElementById('cookbook-download-card-body');
+        const dlArrow = document.getElementById('cookbook-download-card-arrow');
+        if (dlBody && dlBody.style.display === 'none') {
+          dlBody.style.display = 'block';
+          if (dlArrow) dlArrow.style.transform = 'rotate(90deg)';
+        }
+        const dlInput = document.getElementById('cookbook-dl-repo');
+        if (dlInput) {
+          dlInput.value = modelData.name;
+          dlInput.focus();
+          // Briefly highlight so the user sees what got filled even when the
+          // download card sits far above the (long) hwfit list.
+          dlInput.classList.add('cookbook-dl-flash');
+          setTimeout(() => dlInput.classList.remove('cookbook-dl-flash'), 800);
+          dlInput.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+        return;
+      }
      _expandModelRow(row, modelData);
    });
  });
@@ -1297,7 +1434,7 @@ export function _hwfitInit() {
  if (sort) sort.addEventListener('change', () => _hwfitFetch());
  if (qpref) qpref.addEventListener('change', () => _hwfitFetch());
  // Engine filter is a pure client-side view filter over the already-fetched
-  // list, so just re-render from cache instead of re-probing hardware.
+  // list (HF + Ollama merged), so just re-render from cache.
  const engine = document.getElementById('hwfit-engine');
  if (engine) engine.addEventListener('change', () => {
    const list = document.getElementById('hwfit-list');
@@ -1694,6 +1831,15 @@ export function _hwfitInit() {
      saveBtn.addEventListener('click', () => {
        _syncServers();
        _rebuildServerSelect();
+        // Broadcast for anything outside the settings tab that depends on
+        // the server list (Serve dialog host picker, Running tasks, etc.).
+        // Without this the user had to hard-refresh to see the new entry
+        // in those other places.
+        try {
+          document.dispatchEvent(new CustomEvent('cookbook:servers-changed', {
+            detail: { servers: _envState.servers.slice() },
+          }));
+        } catch (_) {}
        saveBtn.classList.add('saved');
        saveBtn.innerHTML = '<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="2.6" stroke-linecap="round" stroke-linejoin="round" style="margin-right:4px;flex-shrink:0;"><polyline points="20 6 9 17 4 12"/></svg>Saved';
      });
@@ -1713,6 +1859,11 @@ export function _hwfitInit() {
      entry.remove();
      _syncServers();
      _rebuildServerSelect();
+      try {
+        document.dispatchEvent(new CustomEvent('cookbook:servers-changed', {
+          detail: { servers: _envState.servers.slice() },
+        }));
+      } catch (_) {}
      _hwfitCache = null;
      _hwfitFetch();
    });