diff --git a/static/js/cookbookServe.js b/static/js/cookbookServe.js
index feff5184d..ac2a33c6e 100644
--- a/static/js/cookbookServe.js
+++ b/static/js/cookbookServe.js
@@ -573,11 +573,16 @@ function _rerenderCachedModels() {
? `${_modelPresets.length} saved launch config${_modelPresets.length === 1 ? '' : 's'} for ${_repoShort} — click ▾ to load or delete`
: `No saved launch configs for ${_repoShort} yet — click Save to add one`;
let _slotsHtml = `
`
- + ``
+ + ``
+ ``
+ `
`;
let panelHtml = `
`;
+ // Runtime-readiness note pinned at the top of the serve area so the
+ // user sees "vLLM ready on …" before scrolling into the configure
+ // form. Hidden until the readiness probe returns. The × button
+ // dismisses it for this panel only (re-shows on re-expand).
+ panelHtml += `
`;
// Warn when serving a model whose download hasn't fully completed —
// the user CAN still hit Launch (vLLM/llama-server will start, then
// crash trying to read missing shards), but they should know.
@@ -596,7 +601,13 @@ function _rerenderCachedModels() {
? [['llamacpp','llama.cpp'],['ollama','Ollama']]
: [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']];
const backendOpts = _backendChoices.map(([v,l]) => ``).join('');
- panelHtml += ``;
+ // Custom Backend picker — native
`;
- panelHtml += ``;
+ // (hwfit-serve-runtime-note moved to the top of the panel — see above.)
if (_ggufChoices.length > 1) {
// Show the GGUF File dropdown for BOTH llama.cpp and Ollama — Ollama
// also needs to know which exact .gguf to import via the new
@@ -631,7 +642,11 @@ function _rerenderCachedModels() {
// TP / Context / GPU / GPU Mem / Max Seqs / Dtype. Everything else
// (Swap, KV Cache, Attention backend, Env vars, llama.cpp batch/ubatch)
// moved to the Advanced fold below to keep this row scannable.
- panelHtml += `
`;
+ panelHtml += `
`;
+ // Order: Dtype → TP → Context → GPU → GPU Mem → Max Seqs.
+ // Dtype moved left of TP at user's request — it's the first knob
+ // people typically check when matching the model to the box.
+ panelHtml += ``;
panelHtml += ``;
// ctx resets to the model's max on every panel open (the real ctx slider
// lives in the Scan/Download toolbar — see cookbook.js .hwfit-ctx-control).
@@ -639,7 +654,6 @@ function _rerenderCachedModels() {
panelHtml += ``;
panelHtml += ``;
panelHtml += ``;
- panelHtml += ``;
panelHtml += `
`;
// ── Advanced (collapsed by default) ──
// Everything below the fold is tuning users only touch occasionally:
@@ -958,13 +972,107 @@ function _rerenderCachedModels() {
if (ok === false) clearInterval(_vramTimer);
}, 4000);
- // Show/hide backend-specific sections
+ // Backend icons — accent color, rendered via currentColor. vLLM gets
+ // a stylized double-V mark, the others fall back to a recognizable
+ // glyph for the engine family. Shown beside each option in the
+ // custom picker so the dropdown lists "[V] vLLM", "[⚡] SGLang", etc.
+ const _BACKEND_GLYPHS = {
+ vllm: '',
+ sglang: '',
+ llamacpp: '',
+ ollama: '',
+ diffusers: '',
+ };
+
+ // ── Custom Backend picker wiring ────────────────────────────────
+ // Reads the option list from the hidden
+ // so the canonical (value, label) pairs come from one place.
+ const _backendPicker = panel.querySelector('[data-backend-picker]');
+ const _backendSource = panel.querySelector('.hwfit-backend-source');
+ const _backendBtn = panel.querySelector('[data-backend-btn]');
+ const _backendMenu = panel.querySelector('[data-backend-menu]');
+ const _backendBtnLabel = panel.querySelector('[data-backend-label]');
+ const _backendBtnIconSlot = _backendBtn?.querySelector('[data-backend-icon-slot]');
+
+ function _setBackendBtnState(v) {
+ if (!_backendBtn) return;
+ const opt = _backendSource?.querySelector(`option[value="${CSS.escape(v)}"]`);
+ const label = opt ? opt.textContent : v;
+ if (_backendBtnLabel) _backendBtnLabel.textContent = label;
+ if (_backendBtnIconSlot) _backendBtnIconSlot.innerHTML = _BACKEND_GLYPHS[v] || _BACKEND_GLYPHS.vllm;
+ }
+
+ function _renderBackendMenu() {
+ if (!_backendMenu || !_backendSource) return;
+ const items = Array.from(_backendSource.options).map(o => ({ value: o.value, label: o.textContent }));
+ _backendMenu.innerHTML = items.map(it => `
+
+ `).join('');
+ // Hover styling (no global CSS rule — keep it self-contained).
+ _backendMenu.querySelectorAll('.hwfit-backend-item').forEach(btn => {
+ btn.addEventListener('mouseenter', () => { btn.style.background = 'color-mix(in srgb, var(--fg) 8%, transparent)'; });
+ btn.addEventListener('mouseleave', () => { btn.style.background = ''; });
+ btn.addEventListener('click', (ev) => {
+ ev.preventDefault();
+ ev.stopPropagation();
+ const v = btn.dataset.value;
+ if (_backendSource && _backendSource.value !== v) {
+ _backendSource.value = v;
+ _backendSource.dispatchEvent(new Event('change', { bubbles: true }));
+ }
+ _setBackendBtnState(v);
+ _closeBackendMenu();
+ });
+ });
+ }
+
+ function _openBackendMenu() {
+ if (!_backendMenu || !_backendBtn) return;
+ _backendMenu.hidden = false;
+ _backendBtn.setAttribute('aria-expanded', 'true');
+ }
+ function _closeBackendMenu() {
+ if (!_backendMenu || !_backendBtn) return;
+ _backendMenu.hidden = true;
+ _backendBtn.setAttribute('aria-expanded', 'false');
+ }
+ if (_backendBtn) {
+ _backendBtn.addEventListener('click', (ev) => {
+ ev.preventDefault();
+ ev.stopPropagation();
+ if (_backendMenu.hidden) _openBackendMenu();
+ else _closeBackendMenu();
+ });
+ document.addEventListener('click', (ev) => {
+ if (!_backendMenu.hidden && !_backendPicker?.contains(ev.target)) _closeBackendMenu();
+ });
+ document.addEventListener('keydown', (ev) => {
+ if (ev.key === 'Escape' && !_backendMenu.hidden) {
+ ev.stopPropagation();
+ _closeBackendMenu();
+ }
+ }, { capture: true });
+ }
+ _renderBackendMenu();
+ _setBackendBtnState(_backendSource?.value || defaultBackend);
+
function updateBackendVisibility() {
const b = panel.querySelector('[data-field="backend"]')?.value || 'vllm';
panel.querySelectorAll('[class*="hwfit-backend-"]').forEach(el => {
+ // Skip the entire backend-picker subtree — the picker's own
+ // classes (`hwfit-backend-picker`, `-btn`, `-menu`, `-item`,
+ // `-btn-icon`, `-btn-label`, `-item-icon`, `-item-label`) all
+ // match the wildcard and would get hidden as if they were
+ // "backend-specific form sections", which left the dropdown
+ // looking empty / collapsed.
+ if (el.closest('.hwfit-backend-picker')) return;
const show = el.classList.contains(`hwfit-backend-${b}`);
el.style.display = show ? '' : 'none';
});
+ _setBackendBtnState(b);
}
updateBackendVisibility();
@@ -974,51 +1082,58 @@ function _rerenderCachedModels() {
// Mirror the message into a small chip next to the model title at
// the top of the card, so the readiness state is visible without
// having to look down into the panel body.
+ // Clean up any title chip from previous versions — the readiness
+ // text now lives inside the panel at the top, not in the card title.
const card = panel.closest('.doclib-card, .memory-item');
const titleEl = card ? card.querySelector('.memory-item-title') : null;
- let titleChip = titleEl ? titleEl.querySelector('.hwfit-serve-runtime-chip') : null;
- const ensureChip = () => {
- if (!titleEl) return null;
- if (!titleChip) {
- titleChip = document.createElement('span');
- titleChip.className = 'hwfit-serve-runtime-chip';
- titleChip.style.cssText = 'margin-left:8px;font-size:10.5px;font-weight:400;opacity:0.7;white-space:normal;line-height:1.3;';
- titleEl.appendChild(titleChip);
- }
- return titleChip;
- };
+ const titleChip = titleEl ? titleEl.querySelector('.hwfit-serve-runtime-chip') : null;
+ if (titleChip) titleChip.remove();
const backend = panel.querySelector('[data-field="backend"]')?.value || 'vllm';
+ const noteText = note.querySelector('.hwfit-serve-runtime-text');
+ const _writeNote = (s) => { if (noteText) noteText.textContent = s; else note.textContent = s; };
if (!['vllm', 'sglang', 'llamacpp', 'diffusers'].includes(backend)) {
note.style.display = 'none';
- note.textContent = '';
- if (titleChip) titleChip.remove();
+ _writeNote('');
return;
}
+ // Wire dismiss once per note element.
+ const _closeBtn = note.querySelector('.hwfit-serve-runtime-close');
+ if (_closeBtn && !_closeBtn._wired) {
+ _closeBtn._wired = true;
+ _closeBtn.addEventListener('click', (ev) => {
+ ev.preventDefault();
+ ev.stopPropagation();
+ note.style.display = 'none';
+ panel._runtimeNoteDismissed = true;
+ });
+ }
+ // If the user dismissed it earlier on this panel, don't re-show.
+ if (panel._runtimeNoteDismissed) return;
const seq = (panel._runtimeReadinessSeq || 0) + 1;
panel._runtimeReadinessSeq = seq;
- // The in-panel note becomes a hidden source-of-truth; the visible
- // copy lives in the title chip.
- note.style.display = 'none';
- const chip = ensureChip();
- if (chip) chip.textContent = 'Checking runtime on selected server…';
+ note.style.display = '';
+ _writeNote('Checking runtime on selected server…');
+ note.style.borderColor = '';
+ note.style.color = 'var(--fg-muted)';
try {
const { pkg, target } = await _fetchServeRuntimePackage(panel, backend);
if (panel._runtimeReadinessSeq !== seq) return;
- const text = _runtimeNoteText(backend, pkg, target);
- note.textContent = text;
- if (chip) {
- chip.textContent = text;
- chip.style.color = pkg?.installed ? 'inherit' : 'var(--red)';
- chip.style.opacity = pkg?.installed ? '0.7' : '1';
+ _writeNote(_runtimeNoteText(backend, pkg, target));
+ if (!pkg?.installed) {
+ note.style.color = 'var(--red)';
+ note.style.borderColor = 'color-mix(in srgb, var(--red) 40%, transparent)';
+ note.style.background = 'color-mix(in srgb, var(--red) 8%, transparent)';
+ } else {
+ // Healthy / ready → green so the user reads "good to go" at a
+ // glance instead of scanning fg-muted for a state.
+ note.style.color = 'var(--green, #4caf50)';
+ note.style.borderColor = 'color-mix(in srgb, var(--green, #4caf50) 40%, transparent)';
+ note.style.background = 'color-mix(in srgb, var(--green, #4caf50) 8%, transparent)';
}
} catch (err) {
if (panel._runtimeReadinessSeq !== seq) return;
- const text = `Runtime readiness unavailable: ${err?.message || err}`;
- note.textContent = text;
- if (chip) {
- chip.textContent = text;
- chip.style.color = 'var(--fg-muted)';
- }
+ _writeNote(`Runtime readiness unavailable: ${err?.message || err}`);
+ note.style.color = 'var(--fg-muted)';
}
}
updateRuntimeReadinessNote();
@@ -1717,15 +1832,39 @@ function _rerenderCachedModels() {
// Cancel button — collapses the serve config panel (same effect as
// tapping the row to toggle it shut). Mobile users wanted an explicit
// "back out" affordance next to Launch.
- panel.querySelector('.hwfit-serve-cancel')?.addEventListener('click', (ev) => {
- ev.stopPropagation();
+ const _collapsePanel = () => {
panel._cleanupRuntimeReadiness?.();
panel.remove();
item.classList.remove('doclib-card-expanded');
item.style.flexDirection = '';
item.style.alignItems = '';
if (list) { list.style.minHeight = ''; list.style.maxHeight = ''; }
+ };
+ panel.querySelector('.hwfit-serve-cancel')?.addEventListener('click', (ev) => {
+ ev.stopPropagation();
+ _collapsePanel();
});
+ // Esc anywhere on the page closes the open serve panel. Skips when
+ // the user is typing in a field — they want Esc to deselect / blur
+ // those, not collapse the form they're configuring.
+ const _onEscClose = (ev) => {
+ if (ev.key !== 'Escape') return;
+ if (!panel.isConnected) {
+ document.removeEventListener('keydown', _onEscClose, true);
+ return;
+ }
+ const t = ev.target;
+ const inField = t && (
+ t.tagName === 'INPUT' || t.tagName === 'TEXTAREA' || t.tagName === 'SELECT' || t.isContentEditable
+ );
+ if (inField) return;
+ // Skip when one of the dropdown/menu popovers is open — the
+ // popovers handle their own Esc and use stopPropagation, so any
+ // Esc that bubbles here means nothing else claimed it.
+ ev.stopPropagation();
+ _collapsePanel();
+ };
+ document.addEventListener('keydown', _onEscClose, true);
// Launch button
panel.querySelector('.hwfit-serve-launch').addEventListener('click', async (ev) => {
@@ -1780,6 +1919,50 @@ function _rerenderCachedModels() {
else serveState[el.dataset.field] = el.value;
});
serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm';
+
+ // Pre-launch: check our own task list for a serve already running
+ // on this host. Offer to stop+launch as the default action — the
+ // SSH-based port probe below is more thorough but it can miss
+ // when SSH glitches or `ss` isn't installed. This catches the
+ // common case instantly without waiting for a network round-trip.
+ try {
+ const _runningMod = await import('./cookbookRunning.js');
+ const _hostStr = _envState.remoteHost || '';
+ const _active = (_runningMod._loadTasks ? _runningMod._loadTasks() : []).filter(t =>
+ t && t.type === 'serve'
+ && (t.remoteHost || '') === _hostStr
+ && (t.status === 'running' || t.status === 'ready' || t._serveReady)
+ );
+ if (_active.length) {
+ const _names = _active.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
+ const _ok = await window.styledConfirm(
+ `${_active.length} model${_active.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`,
+ { title: 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' },
+ );
+ if (!_ok) { _restoreLaunchBtn(); return; }
+ // Kill each active serve; prefer the rendered Stop button so
+ // endpoint cleanup + Ollama unload run normally. Fall back to
+ // a raw tmux kill when the Active tab isn't in the DOM.
+ for (const t of _active) {
+ try {
+ const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
+ const _btn = _el?.querySelector('.cookbook-task-action-stop');
+ if (_btn) {
+ _btn.click();
+ } else if (_runningMod._tmuxGracefulKill) {
+ await fetch('/api/shell/exec', {
+ method: 'POST', credentials: 'same-origin',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }),
+ });
+ }
+ } catch (_killErr) { /* best-effort */ }
+ }
+ // Give the OS a beat to release port 8000.
+ await new Promise(r => setTimeout(r, 2500));
+ }
+ } catch (_e) { /* best-effort */ }
+
const backendWarning = _serveBackendWarning(m, repo, serveState.backend, serveState);
if (backendWarning) {
_restoreLaunchBtn();