fix(cookbook): only block model launch on real port collisions (#4760)

* Fix #4507: only block model launch on real port collisions

Quick-run hardcoded port 8000 and never called _nextAvailablePort(), so
every launch collided. Both pre-launch guards (serve panel + quick-run)
were count-based and fired regardless of port.

- quick-run now auto-assigns a free port (8080 for llama.cpp)
- both guards parse the new port and only prompt on a real overlap,
  stopping only the colliding serve
- dialog reports the actual port instead of a hardcoded 8000

* refactor(cookbook): share _taskPort for port parsing; auto-assign llama.cpp port

Addresses review on #4760:
- _taskPort regex now matches --port= as well as --port (space)
- _nextAvailablePort and both launch guards reuse _taskPort instead of inline regex
- quick-run llama.cpp no longer pins 8080, so two can run concurrently

* fix(cookbook): _taskPort also parses -p; add port-parsing tests

Addresses review on #4760:
- _taskPort now matches -p <n> too, so it's the complete single reader
  (was missing the short flag that other readers already handle)
- add tests/test_cookbook_port_parsing_js.py covering the port forms,
  shared-reader reuse, and llama.cpp auto-assign

* test(cookbook): extract pure port helpers and test behavior

Addresses review on #4760: the prior tests only asserted source strings.
- extract portOf() and nextFreePort() into static/js/cookbookPorts.js
- cookbookRunning.js imports them; _taskPort and _nextAvailablePort delegate
- tests run the helpers via node and assert real behavior: all port forms
  (--port, --port=, -p, -p=), next-free-port skipping taken ports, and the
  same-port-clash / different-port-coexist outcome

---------

Co-authored-by: samy <samy@odysseus.boukouro.com>
This commit is contained in:
Samy
2026-06-24 13:44:09 -04:00
committed by GitHub
parent 22379fe736
commit 5d23495eb2
5 changed files with 130 additions and 56 deletions
+19 -22
View File
@@ -31,7 +31,7 @@ import {
} from './cookbook.js';
import uiModule from './ui.js';
import spinnerModule from './spinner.js';
import { _loadTasks, _tmuxGracefulKill } from './cookbookRunning.js';
import { _loadTasks, _tmuxGracefulKill, _nextAvailablePort, _taskPort } from './cookbookRunning.js';
import { openCookbookDependencies } from './cookbook-diagnosis.js';
// Map a serve-backend code (vllm / sglang / llamacpp) → the package name
@@ -1493,36 +1493,34 @@ export function _expandModelRow(row, modelData) {
}
return;
}
// Detect backend and port now — the pre-launch guard below needs them.
const _qrBackendDetect = _detectBackend(modelData);
const _qrRunBackend = _qrBackendDetect.backend || 'vllm';
const _qrPort = _nextAvailablePort();
// ─── Pre-launch: stop the model already serving on this host ───────
// Two servers can't share port 8000. Without this, the new launch
// silently collided and the user saw no feedback. We surface the
// conflict and offer to kill the running one first as the default
// action (it's almost always what the user wants).
// ─── Pre-launch: stop colliding serves on the same port ───────
// Different ports coexist fine (e.g. vLLM on 8000 + Qwen VL on
// 8001). Only block when the new model's port genuinely collides
// with a running serve. (Issue #4507)
try {
const _qrHostStr = _envState.remoteHost || '';
const _activeServes = _loadTasks().filter(t =>
const _allServes = _loadTasks().filter(t =>
t && t.type === 'serve'
&& (t.remoteHost || '') === _qrHostStr
&& (t.status === 'running' || t.status === 'ready' || t._serveReady)
);
if (_activeServes.length) {
const _names = _activeServes.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
const _clashing = _allServes.filter(t => _taskPort(t) === _qrPort);
if (_clashing.length) {
const _names = _clashing.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
const _ok = await window.styledConfirm?.(
`${_names.length} model${_names.length === 1 ? '' : 's'} already serving on ${_qrHostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`,
`${_clashing.length} model${_clashing.length === 1 ? '' : 's'} on port ${_qrPort} (${_names.join(', ')}). Stop it and launch this one?`,
{ confirmText: 'Stop & launch', cancelText: 'Cancel' }
);
if (!_ok) return;
// Mark + kill each running serve, then wait briefly for the
// tmux session to actually go down before we kick off the new
// launch. Otherwise vLLM still races against the dying socket.
quickRunBtn.disabled = true;
quickRunBtn.textContent = 'Stopping…';
for (const t of _activeServes) {
for (const t of _clashing) {
try {
// Use that task's own Stop button if it's rendered (handles
// endpoint cleanup, Ollama unload, fade-out). Falls back to
// a direct tmux kill if the Active tab isn't in the DOM yet.
const _taskEl = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
const _stopBtn = _taskEl?.querySelector('.cookbook-task-action-stop');
if (_stopBtn) {
@@ -1537,11 +1535,12 @@ export function _expandModelRow(row, modelData) {
}
} catch (_killErr) { /* best-effort */ }
}
// Give the OS a beat to release port 8000.
await new Promise(r => setTimeout(r, 2500));
}
} catch (_e) { /* best-effort */ }
// -- Launch ───────────────────────────────────────────────────
// ─── Pre-launch driver check ─────────────────────────────────────
// vLLM/SGLang need a working CUDA/ROCm driver. nvidia-smi failures
// surface as system.gpu_error from our hardware probe; "no GPU
@@ -1550,8 +1549,6 @@ export function _expandModelRow(row, modelData) {
// user watches `pip install vllm` finish, then sees a cryptic CUDA
// error 10 minutes later. (llama.cpp / Ollama have CPU fallbacks
// so they skip this gate.)
const _qrBackendDetect = _detectBackend(modelData);
const _qrRunBackend = _qrBackendDetect.backend || 'vllm';
if (_qrRunBackend === 'vllm' || _qrRunBackend === 'sglang') {
const _sys = _hwfitCache?.system || {};
if (_sys.gpu_error) {
@@ -1658,7 +1655,7 @@ export function _expandModelRow(row, modelData) {
const host = _envState.remoteHost || '';
const hostIp = host.includes('@') ? host.split('@').pop() : host;
const port = '8000';
const port = _qrPort;
const detected = _detectBackend(modelData);
const runBackend = detected.backend || 'vllm';
@@ -1673,7 +1670,7 @@ export function _expandModelRow(row, modelData) {
} else if (runBackend === 'llamacpp') {
const dir = `"$HOME/.cache/huggingface/hub/models--${modelData.name.replace(/\//g, '--')}/snapshots"`;
const ggufPath = `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
cmd = `llama-server --model "${ggufPath}" --host 0.0.0.0 --port 8080 -ngl 99 -c ${maxCtx} --flash-attn auto`;
cmd = `llama-server --model "${ggufPath}" --host 0.0.0.0 --port ${port} -ngl 99 -c ${maxCtx} --flash-attn auto`;
} else {
cmd = `vllm serve ${modelData.name} --host 0.0.0.0 --port ${port}`;
cmd += ` --tensor-parallel-size ${tp}`;
+19
View File
@@ -0,0 +1,19 @@
// Pure port helpers extracted so they're unit-testable without the
// browser-bound rest of cookbookRunning.js (issue #4507 follow-up).
// Read the port out of a serve launch command. Handles --port 8000,
// --port=8000, -p 8000, and -p=8000. Returns '' when none is present.
export function portOf(cmd) {
const s = cmd || '';
const m = s.match(/--port[=\s]+(\d+)/) || s.match(/(?:^|\s)-p[=\s]+(\d+)/);
return m ? m[1] : '';
}
// Lowest free port >= start that isn't in usedPorts (array or Set of
// numbers/strings). Returns a string to match the serve command format.
export function nextFreePort(usedPorts, start = 8000) {
const used = new Set([...usedPorts].map(p => parseInt(p, 10)));
let port = start;
while (used.has(port)) port++;
return String(port);
}
+6 -9
View File
@@ -8,6 +8,7 @@ import uiModule from './ui.js';
import { _diagnose, _showDiagnosis, _clearDiagnosis } from './cookbook-diagnosis.js';
import { registerMenuDismiss } from './escMenuStack.js';
import { computeProgressSignal } from './cookbookProgressSignal.js';
import { portOf, nextFreePort } from './cookbookPorts.js';
// Human-friendly badge label for a task's internal status. Avoids surfacing
// the word "error" in the sidebar — a server the user stopped or one that
@@ -266,9 +267,7 @@ function _taskHostLabel(task) {
}
function _taskPort(task) {
const cmd = task?.payload?._cmd || '';
const match = cmd.match(/--port\s+(\d+)/);
return match ? match[1] : '';
return portOf(task?.payload?._cmd || '');
}
function _buildCrashReport(task, outputText) {
@@ -455,16 +454,14 @@ function _nextAvailablePort() {
const usedPorts = new Set();
tasks.forEach(t => {
if (t.type === 'serve' && (t.status === 'running' || t.status === 'queued')) {
const m = t.payload?._cmd?.match(/--port\s+(\d+)/);
if (m) usedPorts.add(parseInt(m[1]));
const p = _taskPort(t);
if (p) usedPorts.add(parseInt(p));
}
});
presets.forEach(p => {
if (p.port) usedPorts.add(parseInt(p.port));
});
let port = 8000;
while (usedPorts.has(port)) port++;
return String(port);
return nextFreePort(usedPorts);
}
// ── Endpoint cleanup ──
@@ -3987,4 +3984,4 @@ export function initRunning(shared) {
}
// Also export _retryDownload and _nextAvailablePort for use by other modules
export { _retryDownload, _nextAvailablePort, _processQueue };
export { _retryDownload, _nextAvailablePort, _processQueue, _taskPort };
+33 -25
View File
@@ -2958,33 +2958,41 @@ function _rerenderCachedModels() {
&& ((t.remoteHost || '') === _hostStr || (t.remoteServerKey || '') === _serverKeyStr)
&& (t.status === 'running' || t.status === 'ready' || t._serveReady)
);
// Only block when the new model's port genuinely collides with
// a running serve. Different ports coexist fine (issue #4507).
if (_active.length) {
const _names = _active.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
const _ok = await window.styledConfirm(
`${_active.length} model${_active.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`,
{ title: 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' },
);
if (!_ok) { _restoreLaunchBtn(); return; }
// Kill each active serve; prefer the rendered Stop button so
// endpoint cleanup + Ollama unload run normally. Fall back to
// a raw tmux kill when the Active tab isn't in the DOM.
for (const t of _active) {
try {
const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
const _btn = _el?.querySelector('.cookbook-task-action-stop');
if (_btn) {
_btn.click();
} else if (_runningMod._tmuxGracefulKill) {
await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }),
});
}
} catch (_killErr) { /* best-effort */ }
const _newPort = (launchCmd.match(/--port[=\s]+(\d+)/) || [])[1] || '';
const _clashing = _newPort
? _active.filter(t => _runningMod._taskPort(t) === _newPort)
: _active;
if (_clashing.length) {
const _names = _clashing.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
const _portNote = _newPort ? ` on port ${_newPort}` : '';
const _ok = await window.styledConfirm(
`${_clashing.length} model${_clashing.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')})${_portNote}. Stop it and launch this one?`,
{ title: _newPort ? `Port ${_newPort} in use` : 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' },
);
if (!_ok) { _restoreLaunchBtn(); return; }
// Kill each clashing serve; prefer the rendered Stop button so
// endpoint cleanup + Ollama unload run normally. Fall back to
// a raw tmux kill when the Active tab isn't in the DOM.
for (const t of _clashing) {
try {
const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
const _btn = _el?.querySelector('.cookbook-task-action-stop');
if (_btn) {
_btn.click();
} else if (_runningMod._tmuxGracefulKill) {
await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }),
});
}
} catch (_killErr) { /* best-effort */ }
}
await new Promise(r => setTimeout(r, 2500));
}
// Give the OS a beat to release port 8000.
await new Promise(r => setTimeout(r, 2500));
}
} catch (_e) { /* best-effort */ }
+53
View File
@@ -0,0 +1,53 @@
"""Behavioral tests for Cookbook port parsing / picking (#4507 follow-up).
Driven through `node --input-type=module` (same approach as the other
*_js.py tests); skips when `node` is not installed.
"""
import json
import shutil
import subprocess
from pathlib import Path
import pytest
_REPO = Path(__file__).resolve().parent.parent
_HELPER = _REPO / "static" / "js" / "cookbookPorts.js"
_HAS_NODE = shutil.which("node") is not None
def _run(expr):
js = (
f"import {{ portOf, nextFreePort }} from '{_HELPER.as_posix()}';"
f"console.log(JSON.stringify({expr}));"
)
proc = subprocess.run(
["node", "--input-type=module"],
input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
)
assert proc.returncode == 0, proc.stderr
return json.loads(proc.stdout.strip())
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_port_of_handles_all_forms():
assert _run("portOf('vllm serve m --host 0.0.0.0 --port 8000')") == "8000"
assert _run("portOf('x --port=8001')") == "8001"
assert _run("portOf('llama-server -p 8002')") == "8002"
assert _run("portOf('llama-server -p=8003')") == "8003"
assert _run("portOf('serve with no port flag')") == ""
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_next_free_port_skips_taken_including_eq_and_short_flag():
# a --port= serve and a -p serve are both 'taken'; picker skips them
taken = "[portOf('a --port=8000'), portOf('b -p 8001')]"
assert _run(f"nextFreePort({taken})") == "8002"
assert _run("nextFreePort([])") == "8000"
assert _run("nextFreePort(['8000', '8002'])") == "8001"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_clash_outcome_same_port_flagged_different_ignored():
# the guard's predicate is portOf(cmd) === target
assert _run("portOf('m --port 8000') === '8000'") is True
assert _run("portOf('m --port 8001') === '8000'") is False