mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-26 14:45:33 -04:00
fix(cookbook): only block model launch on real port collisions (#4760)
* Fix #4507: only block model launch on real port collisions Quick-run hardcoded port 8000 and never called _nextAvailablePort(), so every launch collided. Both pre-launch guards (serve panel + quick-run) were count-based and fired regardless of port. - quick-run now auto-assigns a free port (8080 for llama.cpp) - both guards parse the new port and only prompt on a real overlap, stopping only the colliding serve - dialog reports the actual port instead of a hardcoded 8000 * refactor(cookbook): share _taskPort for port parsing; auto-assign llama.cpp port Addresses review on #4760: - _taskPort regex now matches --port= as well as --port (space) - _nextAvailablePort and both launch guards reuse _taskPort instead of inline regex - quick-run llama.cpp no longer pins 8080, so two can run concurrently * fix(cookbook): _taskPort also parses -p; add port-parsing tests Addresses review on #4760: - _taskPort now matches -p <n> too, so it's the complete single reader (was missing the short flag that other readers already handle) - add tests/test_cookbook_port_parsing_js.py covering the port forms, shared-reader reuse, and llama.cpp auto-assign * test(cookbook): extract pure port helpers and test behavior Addresses review on #4760: the prior tests only asserted source strings. - extract portOf() and nextFreePort() into static/js/cookbookPorts.js - cookbookRunning.js imports them; _taskPort and _nextAvailablePort delegate - tests run the helpers via node and assert real behavior: all port forms (--port, --port=, -p, -p=), next-free-port skipping taken ports, and the same-port-clash / different-port-coexist outcome --------- Co-authored-by: samy <samy@odysseus.boukouro.com>
This commit is contained in:
+19
-22
@@ -31,7 +31,7 @@ import {
|
||||
} from './cookbook.js';
|
||||
import uiModule from './ui.js';
|
||||
import spinnerModule from './spinner.js';
|
||||
import { _loadTasks, _tmuxGracefulKill } from './cookbookRunning.js';
|
||||
import { _loadTasks, _tmuxGracefulKill, _nextAvailablePort, _taskPort } from './cookbookRunning.js';
|
||||
import { openCookbookDependencies } from './cookbook-diagnosis.js';
|
||||
|
||||
// Map a serve-backend code (vllm / sglang / llamacpp) → the package name
|
||||
@@ -1493,36 +1493,34 @@ export function _expandModelRow(row, modelData) {
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Detect backend and port now — the pre-launch guard below needs them.
|
||||
const _qrBackendDetect = _detectBackend(modelData);
|
||||
const _qrRunBackend = _qrBackendDetect.backend || 'vllm';
|
||||
const _qrPort = _nextAvailablePort();
|
||||
|
||||
// ─── Pre-launch: stop the model already serving on this host ───────
|
||||
// Two servers can't share port 8000. Without this, the new launch
|
||||
// silently collided and the user saw no feedback. We surface the
|
||||
// conflict and offer to kill the running one first as the default
|
||||
// action (it's almost always what the user wants).
|
||||
// ─── Pre-launch: stop colliding serves on the same port ───────
|
||||
// Different ports coexist fine (e.g. vLLM on 8000 + Qwen VL on
|
||||
// 8001). Only block when the new model's port genuinely collides
|
||||
// with a running serve. (Issue #4507)
|
||||
try {
|
||||
const _qrHostStr = _envState.remoteHost || '';
|
||||
const _activeServes = _loadTasks().filter(t =>
|
||||
const _allServes = _loadTasks().filter(t =>
|
||||
t && t.type === 'serve'
|
||||
&& (t.remoteHost || '') === _qrHostStr
|
||||
&& (t.status === 'running' || t.status === 'ready' || t._serveReady)
|
||||
);
|
||||
if (_activeServes.length) {
|
||||
const _names = _activeServes.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
|
||||
const _clashing = _allServes.filter(t => _taskPort(t) === _qrPort);
|
||||
if (_clashing.length) {
|
||||
const _names = _clashing.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
|
||||
const _ok = await window.styledConfirm?.(
|
||||
`${_names.length} model${_names.length === 1 ? '' : 's'} already serving on ${_qrHostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`,
|
||||
`${_clashing.length} model${_clashing.length === 1 ? '' : 's'} on port ${_qrPort} (${_names.join(', ')}). Stop it and launch this one?`,
|
||||
{ confirmText: 'Stop & launch', cancelText: 'Cancel' }
|
||||
);
|
||||
if (!_ok) return;
|
||||
// Mark + kill each running serve, then wait briefly for the
|
||||
// tmux session to actually go down before we kick off the new
|
||||
// launch. Otherwise vLLM still races against the dying socket.
|
||||
quickRunBtn.disabled = true;
|
||||
quickRunBtn.textContent = 'Stopping…';
|
||||
for (const t of _activeServes) {
|
||||
for (const t of _clashing) {
|
||||
try {
|
||||
// Use that task's own Stop button if it's rendered (handles
|
||||
// endpoint cleanup, Ollama unload, fade-out). Falls back to
|
||||
// a direct tmux kill if the Active tab isn't in the DOM yet.
|
||||
const _taskEl = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
|
||||
const _stopBtn = _taskEl?.querySelector('.cookbook-task-action-stop');
|
||||
if (_stopBtn) {
|
||||
@@ -1537,11 +1535,12 @@ export function _expandModelRow(row, modelData) {
|
||||
}
|
||||
} catch (_killErr) { /* best-effort */ }
|
||||
}
|
||||
// Give the OS a beat to release port 8000.
|
||||
await new Promise(r => setTimeout(r, 2500));
|
||||
}
|
||||
} catch (_e) { /* best-effort */ }
|
||||
|
||||
// -- Launch ───────────────────────────────────────────────────
|
||||
|
||||
// ─── Pre-launch driver check ─────────────────────────────────────
|
||||
// vLLM/SGLang need a working CUDA/ROCm driver. nvidia-smi failures
|
||||
// surface as system.gpu_error from our hardware probe; "no GPU
|
||||
@@ -1550,8 +1549,6 @@ export function _expandModelRow(row, modelData) {
|
||||
// user watches `pip install vllm` finish, then sees a cryptic CUDA
|
||||
// error 10 minutes later. (llama.cpp / Ollama have CPU fallbacks
|
||||
// so they skip this gate.)
|
||||
const _qrBackendDetect = _detectBackend(modelData);
|
||||
const _qrRunBackend = _qrBackendDetect.backend || 'vllm';
|
||||
if (_qrRunBackend === 'vllm' || _qrRunBackend === 'sglang') {
|
||||
const _sys = _hwfitCache?.system || {};
|
||||
if (_sys.gpu_error) {
|
||||
@@ -1658,7 +1655,7 @@ export function _expandModelRow(row, modelData) {
|
||||
|
||||
const host = _envState.remoteHost || '';
|
||||
const hostIp = host.includes('@') ? host.split('@').pop() : host;
|
||||
const port = '8000';
|
||||
const port = _qrPort;
|
||||
const detected = _detectBackend(modelData);
|
||||
const runBackend = detected.backend || 'vllm';
|
||||
|
||||
@@ -1673,7 +1670,7 @@ export function _expandModelRow(row, modelData) {
|
||||
} else if (runBackend === 'llamacpp') {
|
||||
const dir = `"$HOME/.cache/huggingface/hub/models--${modelData.name.replace(/\//g, '--')}/snapshots"`;
|
||||
const ggufPath = `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
|
||||
cmd = `llama-server --model "${ggufPath}" --host 0.0.0.0 --port 8080 -ngl 99 -c ${maxCtx} --flash-attn auto`;
|
||||
cmd = `llama-server --model "${ggufPath}" --host 0.0.0.0 --port ${port} -ngl 99 -c ${maxCtx} --flash-attn auto`;
|
||||
} else {
|
||||
cmd = `vllm serve ${modelData.name} --host 0.0.0.0 --port ${port}`;
|
||||
cmd += ` --tensor-parallel-size ${tp}`;
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
// Pure port helpers extracted so they're unit-testable without the
|
||||
// browser-bound rest of cookbookRunning.js (issue #4507 follow-up).
|
||||
|
||||
// Read the port out of a serve launch command. Handles --port 8000,
|
||||
// --port=8000, -p 8000, and -p=8000. Returns '' when none is present.
|
||||
export function portOf(cmd) {
|
||||
const s = cmd || '';
|
||||
const m = s.match(/--port[=\s]+(\d+)/) || s.match(/(?:^|\s)-p[=\s]+(\d+)/);
|
||||
return m ? m[1] : '';
|
||||
}
|
||||
|
||||
// Lowest free port >= start that isn't in usedPorts (array or Set of
|
||||
// numbers/strings). Returns a string to match the serve command format.
|
||||
export function nextFreePort(usedPorts, start = 8000) {
|
||||
const used = new Set([...usedPorts].map(p => parseInt(p, 10)));
|
||||
let port = start;
|
||||
while (used.has(port)) port++;
|
||||
return String(port);
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import uiModule from './ui.js';
|
||||
import { _diagnose, _showDiagnosis, _clearDiagnosis } from './cookbook-diagnosis.js';
|
||||
import { registerMenuDismiss } from './escMenuStack.js';
|
||||
import { computeProgressSignal } from './cookbookProgressSignal.js';
|
||||
import { portOf, nextFreePort } from './cookbookPorts.js';
|
||||
|
||||
// Human-friendly badge label for a task's internal status. Avoids surfacing
|
||||
// the word "error" in the sidebar — a server the user stopped or one that
|
||||
@@ -266,9 +267,7 @@ function _taskHostLabel(task) {
|
||||
}
|
||||
|
||||
function _taskPort(task) {
|
||||
const cmd = task?.payload?._cmd || '';
|
||||
const match = cmd.match(/--port\s+(\d+)/);
|
||||
return match ? match[1] : '';
|
||||
return portOf(task?.payload?._cmd || '');
|
||||
}
|
||||
|
||||
function _buildCrashReport(task, outputText) {
|
||||
@@ -455,16 +454,14 @@ function _nextAvailablePort() {
|
||||
const usedPorts = new Set();
|
||||
tasks.forEach(t => {
|
||||
if (t.type === 'serve' && (t.status === 'running' || t.status === 'queued')) {
|
||||
const m = t.payload?._cmd?.match(/--port\s+(\d+)/);
|
||||
if (m) usedPorts.add(parseInt(m[1]));
|
||||
const p = _taskPort(t);
|
||||
if (p) usedPorts.add(parseInt(p));
|
||||
}
|
||||
});
|
||||
presets.forEach(p => {
|
||||
if (p.port) usedPorts.add(parseInt(p.port));
|
||||
});
|
||||
let port = 8000;
|
||||
while (usedPorts.has(port)) port++;
|
||||
return String(port);
|
||||
return nextFreePort(usedPorts);
|
||||
}
|
||||
|
||||
// ── Endpoint cleanup ──
|
||||
@@ -3987,4 +3984,4 @@ export function initRunning(shared) {
|
||||
}
|
||||
|
||||
// Also export _retryDownload and _nextAvailablePort for use by other modules
|
||||
export { _retryDownload, _nextAvailablePort, _processQueue };
|
||||
export { _retryDownload, _nextAvailablePort, _processQueue, _taskPort };
|
||||
|
||||
+33
-25
@@ -2958,33 +2958,41 @@ function _rerenderCachedModels() {
|
||||
&& ((t.remoteHost || '') === _hostStr || (t.remoteServerKey || '') === _serverKeyStr)
|
||||
&& (t.status === 'running' || t.status === 'ready' || t._serveReady)
|
||||
);
|
||||
// Only block when the new model's port genuinely collides with
|
||||
// a running serve. Different ports coexist fine (issue #4507).
|
||||
if (_active.length) {
|
||||
const _names = _active.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
|
||||
const _ok = await window.styledConfirm(
|
||||
`${_active.length} model${_active.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`,
|
||||
{ title: 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' },
|
||||
);
|
||||
if (!_ok) { _restoreLaunchBtn(); return; }
|
||||
// Kill each active serve; prefer the rendered Stop button so
|
||||
// endpoint cleanup + Ollama unload run normally. Fall back to
|
||||
// a raw tmux kill when the Active tab isn't in the DOM.
|
||||
for (const t of _active) {
|
||||
try {
|
||||
const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
|
||||
const _btn = _el?.querySelector('.cookbook-task-action-stop');
|
||||
if (_btn) {
|
||||
_btn.click();
|
||||
} else if (_runningMod._tmuxGracefulKill) {
|
||||
await fetch('/api/shell/exec', {
|
||||
method: 'POST', credentials: 'same-origin',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }),
|
||||
});
|
||||
}
|
||||
} catch (_killErr) { /* best-effort */ }
|
||||
const _newPort = (launchCmd.match(/--port[=\s]+(\d+)/) || [])[1] || '';
|
||||
const _clashing = _newPort
|
||||
? _active.filter(t => _runningMod._taskPort(t) === _newPort)
|
||||
: _active;
|
||||
if (_clashing.length) {
|
||||
const _names = _clashing.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
|
||||
const _portNote = _newPort ? ` on port ${_newPort}` : '';
|
||||
const _ok = await window.styledConfirm(
|
||||
`${_clashing.length} model${_clashing.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')})${_portNote}. Stop it and launch this one?`,
|
||||
{ title: _newPort ? `Port ${_newPort} in use` : 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' },
|
||||
);
|
||||
if (!_ok) { _restoreLaunchBtn(); return; }
|
||||
// Kill each clashing serve; prefer the rendered Stop button so
|
||||
// endpoint cleanup + Ollama unload run normally. Fall back to
|
||||
// a raw tmux kill when the Active tab isn't in the DOM.
|
||||
for (const t of _clashing) {
|
||||
try {
|
||||
const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
|
||||
const _btn = _el?.querySelector('.cookbook-task-action-stop');
|
||||
if (_btn) {
|
||||
_btn.click();
|
||||
} else if (_runningMod._tmuxGracefulKill) {
|
||||
await fetch('/api/shell/exec', {
|
||||
method: 'POST', credentials: 'same-origin',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }),
|
||||
});
|
||||
}
|
||||
} catch (_killErr) { /* best-effort */ }
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 2500));
|
||||
}
|
||||
// Give the OS a beat to release port 8000.
|
||||
await new Promise(r => setTimeout(r, 2500));
|
||||
}
|
||||
} catch (_e) { /* best-effort */ }
|
||||
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Behavioral tests for Cookbook port parsing / picking (#4507 follow-up).
|
||||
|
||||
Driven through `node --input-type=module` (same approach as the other
|
||||
*_js.py tests); skips when `node` is not installed.
|
||||
"""
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
_REPO = Path(__file__).resolve().parent.parent
|
||||
_HELPER = _REPO / "static" / "js" / "cookbookPorts.js"
|
||||
_HAS_NODE = shutil.which("node") is not None
|
||||
|
||||
|
||||
def _run(expr):
|
||||
js = (
|
||||
f"import {{ portOf, nextFreePort }} from '{_HELPER.as_posix()}';"
|
||||
f"console.log(JSON.stringify({expr}));"
|
||||
)
|
||||
proc = subprocess.run(
|
||||
["node", "--input-type=module"],
|
||||
input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
|
||||
)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
return json.loads(proc.stdout.strip())
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
||||
def test_port_of_handles_all_forms():
|
||||
assert _run("portOf('vllm serve m --host 0.0.0.0 --port 8000')") == "8000"
|
||||
assert _run("portOf('x --port=8001')") == "8001"
|
||||
assert _run("portOf('llama-server -p 8002')") == "8002"
|
||||
assert _run("portOf('llama-server -p=8003')") == "8003"
|
||||
assert _run("portOf('serve with no port flag')") == ""
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
||||
def test_next_free_port_skips_taken_including_eq_and_short_flag():
|
||||
# a --port= serve and a -p serve are both 'taken'; picker skips them
|
||||
taken = "[portOf('a --port=8000'), portOf('b -p 8001')]"
|
||||
assert _run(f"nextFreePort({taken})") == "8002"
|
||||
assert _run("nextFreePort([])") == "8000"
|
||||
assert _run("nextFreePort(['8000', '8002'])") == "8001"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
||||
def test_clash_outcome_same_port_flagged_different_ignored():
|
||||
# the guard's predicate is portOf(cmd) === target
|
||||
assert _run("portOf('m --port 8000') === '8000'") is True
|
||||
assert _run("portOf('m --port 8001') === '8000'") is False
|
||||
Reference in New Issue
Block a user