Cookbook: scoring fixes, UI polish, false-finished + stale-state bug fixes

Backend (services/hwfit + routes): - rank_models picks visible set by REQUESTED column, not always score — sorting by Param now shows highest-param models PERIOD (incl. too_tight). - New fit_only param. Multi-GPU rigs filter GGUF Q*/IQ quants (vLLM/SGLang cannot serve them); default non-prequantized to BF16 on 2+ GPUs. - AWQ / GPTQ-8bit get a -1.0 quality penalty (was 0.0, tied with FP8), so FP8 wins when both fit. - Version-aware tiebreaker (parse Mn.n / Vn) — MiniMax-M2.7 ranks above M2.5 on equal composite score; >=100B integers not misread as versions. - /api/cookbook/hf-latest no longer drops models without an "NB" pattern in the repo id (MiniMax-M2.7, DeepSeek-V4-Pro etc. were silently filtered). - Cached-model scan: atexit flushes models JSON even if the script is killed mid-walk; each scan_dir wrapped in try/except; timeout 60s -> 180s. - KB granularity for sub-MB sizes (was "0 MB" for 12 KB shells). New "stalled" status for shells <1 MB with no .incomplete files. - /api/cookbook/state POST guard: rejects "done" download tasks lacking DOWNLOAD_OK / DOWNLOAD_FAILED / /snapshots/ when the last-mentioned shard is N<total — stops stale tabs from poisoning persisted state. - hf_models.json: add zai-org/GLM-5.1; flip zai-org/GLM-5 quantization Q4_K_M -> BF16 (it is the native base, not a quant). Frontend (static/js): - Scan/Download toolbar: quant defaults to All; ctx slider (8k/16k/32k/ 50k/128k/Max) ported from origin/main with sort=fit on drag, sort=score on Max. GPU toggle commits _activeCount to maxGpu on initial render. Fit column header tagged with active budget (RAM / GPU / N GPU). - Foldable Download admin-card: the Download h2 is the chevron trigger; state persists in localStorage. - Download card surfaces destination dir (Dir: <path>). Same dir on running task row, font/color matched to uptime (9px Fira Code muted, opacity .4). - Serve panel ctx text input always resets to model max on open. Sub-MB cached models show with red "download stalled" badge. - Bulk-select Cancel + Delete reset the Select button label on exit. - Cookbook running: false-finished bug fixed — DOWNLOAD_OK or /snapshots/ required; bare "Download complete" no longer marks the task done after the first config file. Clear button now sends tmux kill-session too. True overall % for multi-shard downloads: ((N-1)+frac)/total instead of hf_transfer per-shard aggregate. - Diagnosis card simplified: removed fold toggle, copy button, dismiss X. Suggestion font matches message body (12px). - HF token field flashes green check + "Saved" on save. - Cached scan no longer counts stalled rows as downloaded in Scan/Download. CSS: - dep Install button width pinned to 76px to match Installed split. - task-sub row +1px; task-status badge gets margin-right 8px. - Ctx slider styled like gallery editor sliders (thin pill rail, red thumb). - Bulk-select cancel button top -3px -> -5px.
2026-06-15 17:25:26 -04:00 · 2026-06-03 16:32:20 +09:00
parent ab0a480f30
commit eb79b76432
15 changed files with 1175 additions and 198 deletions
@@ -169,6 +169,9 @@ export function _parseServePhase(snapshot) {
  if (flat.includes('Application startup complete')) {
    return { phase: 'ready', status: 'ready' };
  }
+  if (/Ollama API ready on port\s+\d+/i.test(flat)) {
+    return { phase: 'ready', status: 'ready' };
+  }
  // HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up
  if (/(?:GET|POST)\s+\/[^\s]*\s+HTTP\/[\d.]+"\s*\d{3}/.test(flat)) {
    return { phase: 'idle', status: 'ready' };
@@ -2295,15 +2298,24 @@ async function _reconnectTask(el, task) {
        if (task.type === 'serve' && !task._endpointAdded && !task._endpointAddInFlight && task._serveReady) {
          task._endpointAddInFlight = true;
          const rawHost = task.remoteHost || 'localhost';
-          const host = rawHost.includes('@') ? rawHost.split('@').pop() : rawHost;
+          let host = rawHost.includes('@') ? rawHost.split('@').pop() : rawHost;
          const portMatch = task.payload?._cmd?.match(/--port[=\s]+(\d+)/)
            || task.payload?._cmd?.match(/(?:^|\s)-p[=\s]+(\d+)/)
            || snapshot.match(/Uvicorn running on\D*?:(\d+)/i)
            || snapshot.match(/running on\D*?:(\d+)/i)
            || snapshot.match(/listening on\D*?:(\d+)/i)
            || snapshot.match(/port[:=\s]+(\d+)/i);
-          const port = portMatch ? portMatch[1] : '8000';
-          const baseUrl = `http://${host}:${port}/v1`;
+          let port = portMatch ? portMatch[1] : '8000';
+          let baseUrl = `http://${host}:${port}/v1`;
+          const ollamaUrlMatch = snapshot.match(/Ollama API ready on port\s+\d+:\s*(http:\/\/[^\s]+)/i);
+          if (ollamaUrlMatch) {
+            try {
+              const u = new URL(ollamaUrlMatch[1]);
+              host = u.hostname || host;
+              port = u.port || '11434';
+              baseUrl = `${u.origin}/v1`;
+            } catch {}
+          }
          fetch('/api/model-endpoints', { credentials: 'same-origin' })
            .then(r => r.json())
            .then(async (eps) => {
@@ -2642,10 +2654,21 @@ async function _pollBackgroundStatus() {
      if (localTask && localTask._endpointAdded) continue;

      const rawHost = localTask?.remoteHost || t.remote || 'localhost';
-      const host = rawHost.includes('@') ? rawHost.split('@').pop() : (rawHost === 'local' ? 'localhost' : rawHost);
-      const portMatch = localTask?.payload?._cmd?.match(/--port\s+(\d+)/);
-      const port = portMatch ? portMatch[1] : '8000';
-      const baseUrl = `http://${host}:${port}/v1`;
+      let host = rawHost.includes('@') ? rawHost.split('@').pop() : (rawHost === 'local' ? 'localhost' : rawHost);
+      const portMatch = localTask?.payload?._cmd?.match(/--port\s+(\d+)/)
+        || localTask?.payload?._cmd?.match(/OLLAMA_HOST=[^\s:]+:(\d+)/);
+      let port = portMatch ? portMatch[1] : '8000';
+      let baseUrl = `http://${host}:${port}/v1`;
+      const snapshot = t.output || localTask?.output || '';
+      const ollamaUrlMatch = snapshot.match(/Ollama API ready on port\s+\d+:\s*(http:\/\/[^\s]+)/i);
+      if (ollamaUrlMatch) {
+        try {
+          const u = new URL(ollamaUrlMatch[1]);
+          host = u.hostname || host;
+          port = u.port || '11434';
+          baseUrl = `${u.origin}/v1`;
+        } catch {}
+      }
      const _isDiffusion = localTask?.payload?._cmd?.includes('diffusion_server');

      _updateTask(t.session_id, { _serveReady: true, _endpointAdded: true });