fix(cookbook): only block model launch on real port collisions (#4760)

* Fix #4507: only block model launch on real port collisions Quick-run hardcoded port 8000 and never called _nextAvailablePort(), so every launch collided. Both pre-launch guards (serve panel + quick-run) were count-based and fired regardless of port. - quick-run now auto-assigns a free port (8080 for llama.cpp) - both guards parse the new port and only prompt on a real overlap, stopping only the colliding serve - dialog reports the actual port instead of a hardcoded 8000 * refactor(cookbook): share _taskPort for port parsing; auto-assign llama.cpp port Addresses review on #4760: - _taskPort regex now matches --port= as well as --port (space) - _nextAvailablePort and both launch guards reuse _taskPort instead of inline regex - quick-run llama.cpp no longer pins 8080, so two can run concurrently * fix(cookbook): _taskPort also parses -p; add port-parsing tests Addresses review on #4760: - _taskPort now matches -p <n> too, so it's the complete single reader (was missing the short flag that other readers already handle) - add tests/test_cookbook_port_parsing_js.py covering the port forms, shared-reader reuse, and llama.cpp auto-assign * test(cookbook): extract pure port helpers and test behavior Addresses review on #4760: the prior tests only asserted source strings. - extract portOf() and nextFreePort() into static/js/cookbookPorts.js - cookbookRunning.js imports them; _taskPort and _nextAvailablePort delegate - tests run the helpers via node and assert real behavior: all port forms (--port, --port=, -p, -p=), next-free-port skipping taken ports, and the same-port-clash / different-port-coexist outcome --------- Co-authored-by: samy <samy@odysseus.boukouro.com>
2026-06-26 14:45:33 -04:00 · 2026-06-24 13:44:09 -04:00
parent 22379fe736
commit 5d23495eb2
5 changed files with 130 additions and 56 deletions
@@ -31,7 +31,7 @@ import {
 } from './cookbook.js';
 import uiModule from './ui.js';
 import spinnerModule from './spinner.js';
-import { _loadTasks, _tmuxGracefulKill } from './cookbookRunning.js';
+import { _loadTasks, _tmuxGracefulKill, _nextAvailablePort, _taskPort } from './cookbookRunning.js';
 import { openCookbookDependencies } from './cookbook-diagnosis.js';

 // Map a serve-backend code (vllm / sglang / llamacpp) → the package name
@@ -1493,36 +1493,34 @@ export function _expandModelRow(row, modelData) {
        }
        return;
      }
+      // Detect backend and port now — the pre-launch guard below needs them.
+      const _qrBackendDetect = _detectBackend(modelData);
+      const _qrRunBackend = _qrBackendDetect.backend || 'vllm';
+      const _qrPort = _nextAvailablePort();

-      // ─── Pre-launch: stop the model already serving on this host ───────
-      // Two servers can't share port 8000. Without this, the new launch
-      // silently collided and the user saw no feedback. We surface the
-      // conflict and offer to kill the running one first as the default
-      // action (it's almost always what the user wants).
+      // ─── Pre-launch: stop colliding serves on the same port ───────
+      // Different ports coexist fine (e.g. vLLM on 8000 + Qwen VL on
+      // 8001). Only block when the new model's port genuinely collides
+      // with a running serve. (Issue #4507)
      try {
        const _qrHostStr = _envState.remoteHost || '';
-        const _activeServes = _loadTasks().filter(t =>
+        const _allServes = _loadTasks().filter(t =>
          t && t.type === 'serve'
          && (t.remoteHost || '') === _qrHostStr
          && (t.status === 'running' || t.status === 'ready' || t._serveReady)
        );
-        if (_activeServes.length) {
-          const _names = _activeServes.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
+        const _clashing = _allServes.filter(t => _taskPort(t) === _qrPort);
+        if (_clashing.length) {
+          const _names = _clashing.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
          const _ok = await window.styledConfirm?.(
-            `${_names.length} model${_names.length === 1 ? '' : 's'} already serving on ${_qrHostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`,
+            `${_clashing.length} model${_clashing.length === 1 ? '' : 's'} on port ${_qrPort} (${_names.join(', ')}). Stop it and launch this one?`,
            { confirmText: 'Stop & launch', cancelText: 'Cancel' }
          );
          if (!_ok) return;
-          // Mark + kill each running serve, then wait briefly for the
-          // tmux session to actually go down before we kick off the new
-          // launch. Otherwise vLLM still races against the dying socket.
          quickRunBtn.disabled = true;
          quickRunBtn.textContent = 'Stopping…';
-          for (const t of _activeServes) {
+          for (const t of _clashing) {
            try {
-              // Use that task's own Stop button if it's rendered (handles
-              // endpoint cleanup, Ollama unload, fade-out). Falls back to
-              // a direct tmux kill if the Active tab isn't in the DOM yet.
              const _taskEl = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
              const _stopBtn = _taskEl?.querySelector('.cookbook-task-action-stop');
              if (_stopBtn) {
@@ -1537,11 +1535,12 @@ export function _expandModelRow(row, modelData) {
              }
            } catch (_killErr) { /* best-effort */ }
          }
-          // Give the OS a beat to release port 8000.
          await new Promise(r => setTimeout(r, 2500));
        }
      } catch (_e) { /* best-effort */ }

+      // -- Launch ───────────────────────────────────────────────────
+
      // ─── Pre-launch driver check ─────────────────────────────────────
      // vLLM/SGLang need a working CUDA/ROCm driver. nvidia-smi failures
      // surface as system.gpu_error from our hardware probe; "no GPU
@@ -1550,8 +1549,6 @@ export function _expandModelRow(row, modelData) {
      // user watches `pip install vllm` finish, then sees a cryptic CUDA
      // error 10 minutes later. (llama.cpp / Ollama have CPU fallbacks
      // so they skip this gate.)
-      const _qrBackendDetect = _detectBackend(modelData);
-      const _qrRunBackend = _qrBackendDetect.backend || 'vllm';
      if (_qrRunBackend === 'vllm' || _qrRunBackend === 'sglang') {
        const _sys = _hwfitCache?.system || {};
        if (_sys.gpu_error) {
@@ -1658,7 +1655,7 @@ export function _expandModelRow(row, modelData) {

      const host = _envState.remoteHost || '';
      const hostIp = host.includes('@') ? host.split('@').pop() : host;
-      const port = '8000';
+      const port = _qrPort;
      const detected = _detectBackend(modelData);
      const runBackend = detected.backend || 'vllm';

@@ -1673,7 +1670,7 @@ export function _expandModelRow(row, modelData) {
      } else if (runBackend === 'llamacpp') {
        const dir = `"$HOME/.cache/huggingface/hub/models--${modelData.name.replace(/\//g, '--')}/snapshots"`;
        const ggufPath = `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
-        cmd = `llama-server --model "${ggufPath}" --host 0.0.0.0 --port 8080 -ngl 99 -c ${maxCtx} --flash-attn auto`;
+        cmd = `llama-server --model "${ggufPath}" --host 0.0.0.0 --port ${port} -ngl 99 -c ${maxCtx} --flash-attn auto`;
      } else {
        cmd = `vllm serve ${modelData.name} --host 0.0.0.0 --port ${port}`;
        cmd += ` --tensor-parallel-size ${tp}`;
@@ -0,0 +1,19 @@
+// Pure port helpers extracted so they're unit-testable without the
+// browser-bound rest of cookbookRunning.js (issue #4507 follow-up).
+
+// Read the port out of a serve launch command. Handles --port 8000,
+// --port=8000, -p 8000, and -p=8000. Returns '' when none is present.
+export function portOf(cmd) {
+  const s = cmd || '';
+  const m = s.match(/--port[=\s]+(\d+)/) || s.match(/(?:^|\s)-p[=\s]+(\d+)/);
+  return m ? m[1] : '';
+}
+
+// Lowest free port >= start that isn't in usedPorts (array or Set of
+// numbers/strings). Returns a string to match the serve command format.
+export function nextFreePort(usedPorts, start = 8000) {
+  const used = new Set([...usedPorts].map(p => parseInt(p, 10)));
+  let port = start;
+  while (used.has(port)) port++;
+  return String(port);
+}
@@ -8,6 +8,7 @@ import uiModule from './ui.js';
 import { _diagnose, _showDiagnosis, _clearDiagnosis } from './cookbook-diagnosis.js';
 import { registerMenuDismiss } from './escMenuStack.js';
 import { computeProgressSignal } from './cookbookProgressSignal.js';
+import { portOf, nextFreePort } from './cookbookPorts.js';

 // Human-friendly badge label for a task's internal status. Avoids surfacing
 // the word "error" in the sidebar — a server the user stopped or one that
@@ -266,9 +267,7 @@ function _taskHostLabel(task) {
 }

 function _taskPort(task) {
-  const cmd = task?.payload?._cmd || '';
-  const match = cmd.match(/--port\s+(\d+)/);
-  return match ? match[1] : '';
+  return portOf(task?.payload?._cmd || '');
 }

 function _buildCrashReport(task, outputText) {
@@ -455,16 +454,14 @@ function _nextAvailablePort() {
  const usedPorts = new Set();
  tasks.forEach(t => {
    if (t.type === 'serve' && (t.status === 'running' || t.status === 'queued')) {
-      const m = t.payload?._cmd?.match(/--port\s+(\d+)/);
-      if (m) usedPorts.add(parseInt(m[1]));
+      const p = _taskPort(t);
+      if (p) usedPorts.add(parseInt(p));
    }
  });
  presets.forEach(p => {
    if (p.port) usedPorts.add(parseInt(p.port));
  });
-  let port = 8000;
-  while (usedPorts.has(port)) port++;
-  return String(port);
+  return nextFreePort(usedPorts);
 }

 // ── Endpoint cleanup ──
@@ -3987,4 +3984,4 @@ export function initRunning(shared) {
 }

 // Also export _retryDownload and _nextAvailablePort for use by other modules
-export { _retryDownload, _nextAvailablePort, _processQueue };
+export { _retryDownload, _nextAvailablePort, _processQueue, _taskPort };
@@ -2958,33 +2958,41 @@ function _rerenderCachedModels() {
            && ((t.remoteHost || '') === _hostStr || (t.remoteServerKey || '') === _serverKeyStr)
            && (t.status === 'running' || t.status === 'ready' || t._serveReady)
          );
+          // Only block when the new model's port genuinely collides with
+          // a running serve. Different ports coexist fine (issue #4507).
          if (_active.length) {
-            const _names = _active.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
-            const _ok = await window.styledConfirm(
-              `${_active.length} model${_active.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`,
-              { title: 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' },
-            );
-            if (!_ok) { _restoreLaunchBtn(); return; }
-            // Kill each active serve; prefer the rendered Stop button so
-            // endpoint cleanup + Ollama unload run normally. Fall back to
-            // a raw tmux kill when the Active tab isn't in the DOM.
-            for (const t of _active) {
-              try {
-                const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
-                const _btn = _el?.querySelector('.cookbook-task-action-stop');
-                if (_btn) {
-                  _btn.click();
-                } else if (_runningMod._tmuxGracefulKill) {
-                  await fetch('/api/shell/exec', {
-                    method: 'POST', credentials: 'same-origin',
-                    headers: { 'Content-Type': 'application/json' },
-                    body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }),
-                  });
-                }
-              } catch (_killErr) { /* best-effort */ }
+            const _newPort = (launchCmd.match(/--port[=\s]+(\d+)/) || [])[1] || '';
+            const _clashing = _newPort
+              ? _active.filter(t => _runningMod._taskPort(t) === _newPort)
+              : _active;
+            if (_clashing.length) {
+              const _names = _clashing.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
+              const _portNote = _newPort ? ` on port ${_newPort}` : '';
+              const _ok = await window.styledConfirm(
+                `${_clashing.length} model${_clashing.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')})${_portNote}. Stop it and launch this one?`,
+                { title: _newPort ? `Port ${_newPort} in use` : 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' },
+              );
+              if (!_ok) { _restoreLaunchBtn(); return; }
+              // Kill each clashing serve; prefer the rendered Stop button so
+              // endpoint cleanup + Ollama unload run normally. Fall back to
+              // a raw tmux kill when the Active tab isn't in the DOM.
+              for (const t of _clashing) {
+                try {
+                  const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
+                  const _btn = _el?.querySelector('.cookbook-task-action-stop');
+                  if (_btn) {
+                    _btn.click();
+                  } else if (_runningMod._tmuxGracefulKill) {
+                    await fetch('/api/shell/exec', {
+                      method: 'POST', credentials: 'same-origin',
+                      headers: { 'Content-Type': 'application/json' },
+                      body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }),
+                    });
+                  }
+                } catch (_killErr) { /* best-effort */ }
+              }
+              await new Promise(r => setTimeout(r, 2500));
            }
-            // Give the OS a beat to release port 8000.
-            await new Promise(r => setTimeout(r, 2500));
          }
        } catch (_e) { /* best-effort */ }

@@ -0,0 +1,53 @@
+"""Behavioral tests for Cookbook port parsing / picking (#4507 follow-up).
+
+Driven through `node --input-type=module` (same approach as the other
+*_js.py tests); skips when `node` is not installed.
+"""
+import json
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+_REPO = Path(__file__).resolve().parent.parent
+_HELPER = _REPO / "static" / "js" / "cookbookPorts.js"
+_HAS_NODE = shutil.which("node") is not None
+
+
+def _run(expr):
+    js = (
+        f"import {{ portOf, nextFreePort }} from '{_HELPER.as_posix()}';"
+        f"console.log(JSON.stringify({expr}));"
+    )
+    proc = subprocess.run(
+        ["node", "--input-type=module"],
+        input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
+    )
+    assert proc.returncode == 0, proc.stderr
+    return json.loads(proc.stdout.strip())
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_port_of_handles_all_forms():
+    assert _run("portOf('vllm serve m --host 0.0.0.0 --port 8000')") == "8000"
+    assert _run("portOf('x --port=8001')") == "8001"
+    assert _run("portOf('llama-server -p 8002')") == "8002"
+    assert _run("portOf('llama-server -p=8003')") == "8003"
+    assert _run("portOf('serve with no port flag')") == ""
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_next_free_port_skips_taken_including_eq_and_short_flag():
+    # a --port= serve and a -p serve are both 'taken'; picker skips them
+    taken = "[portOf('a --port=8000'), portOf('b -p 8001')]"
+    assert _run(f"nextFreePort({taken})") == "8002"
+    assert _run("nextFreePort([])") == "8000"
+    assert _run("nextFreePort(['8000', '8002'])") == "8001"
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_clash_outcome_same_port_flagged_different_ignored():
+    # the guard's predicate is portOf(cmd) === target
+    assert _run("portOf('m --port 8000') === '8000'") is True
+    assert _run("portOf('m --port 8001') === '8000'") is False