diff --git a/static/js/cookbookServe.js b/static/js/cookbookServe.js index feff5184d..ac2a33c6e 100644 --- a/static/js/cookbookServe.js +++ b/static/js/cookbookServe.js @@ -573,11 +573,16 @@ function _rerenderCachedModels() { ? `${_modelPresets.length} saved launch config${_modelPresets.length === 1 ? '' : 's'} for ${_repoShort} — click ▾ to load or delete` : `No saved launch configs for ${_repoShort} yet — click Save to add one`; let _slotsHtml = `
` - + `` + + `` + `` + `
`; let panelHtml = `
`; + // Runtime-readiness note pinned at the top of the serve area so the + // user sees "vLLM ready on …" before scrolling into the configure + // form. Hidden until the readiness probe returns. The × button + // dismisses it for this panel only (re-shows on re-expand). + panelHtml += ``; // Warn when serving a model whose download hasn't fully completed — // the user CAN still hit Launch (vLLM/llama-server will start, then // crash trying to read missing shards), but they should know. @@ -596,7 +601,13 @@ function _rerenderCachedModels() { ? [['llamacpp','llama.cpp'],['ollama','Ollama']] : [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']]; const backendOpts = _backendChoices.map(([v,l]) => ``).join(''); - panelHtml += ``; + // Custom Backend picker — native ${backendOpts}
`; panelHtml += ``; panelHtml += ``; const defaultPort = defaultBackend === 'ollama' ? '11434' : _nextAvailablePort(); @@ -614,7 +625,7 @@ function _rerenderCachedModels() { // so it shares the same baseline as the rest of the top controls. panelHtml += _slotsHtml; panelHtml += ``; - panelHtml += ``; + // (hwfit-serve-runtime-note moved to the top of the panel — see above.) if (_ggufChoices.length > 1) { // Show the GGUF File dropdown for BOTH llama.cpp and Ollama — Ollama // also needs to know which exact .gguf to import via the new @@ -631,7 +642,11 @@ function _rerenderCachedModels() { // TP / Context / GPU / GPU Mem / Max Seqs / Dtype. Everything else // (Swap, KV Cache, Attention backend, Env vars, llama.cpp batch/ubatch) // moved to the Advanced fold below to keep this row scannable. - panelHtml += `
`; + panelHtml += `
`; + // Order: Dtype → TP → Context → GPU → GPU Mem → Max Seqs. + // Dtype moved left of TP at user's request — it's the first knob + // people typically check when matching the model to the box. + panelHtml += ``; panelHtml += ``; // ctx resets to the model's max on every panel open (the real ctx slider // lives in the Scan/Download toolbar — see cookbook.js .hwfit-ctx-control). @@ -639,7 +654,6 @@ function _rerenderCachedModels() { panelHtml += ``; panelHtml += ``; panelHtml += ``; - panelHtml += ``; panelHtml += `
`; // ── Advanced (collapsed by default) ── // Everything below the fold is tuning users only touch occasionally: @@ -958,13 +972,107 @@ function _rerenderCachedModels() { if (ok === false) clearInterval(_vramTimer); }, 4000); - // Show/hide backend-specific sections + // Backend icons — accent color, rendered via currentColor. vLLM gets + // a stylized double-V mark, the others fall back to a recognizable + // glyph for the engine family. Shown beside each option in the + // custom picker so the dropdown lists "[V] vLLM", "[⚡] SGLang", etc. + const _BACKEND_GLYPHS = { + vllm: '', + sglang: '', + llamacpp: '', + ollama: '', + diffusers: '', + }; + + // ── Custom Backend picker wiring ──────────────────────────────── + // Reads the option list from the hidden + // so the canonical (value, label) pairs come from one place. + const _backendPicker = panel.querySelector('[data-backend-picker]'); + const _backendSource = panel.querySelector('.hwfit-backend-source'); + const _backendBtn = panel.querySelector('[data-backend-btn]'); + const _backendMenu = panel.querySelector('[data-backend-menu]'); + const _backendBtnLabel = panel.querySelector('[data-backend-label]'); + const _backendBtnIconSlot = _backendBtn?.querySelector('[data-backend-icon-slot]'); + + function _setBackendBtnState(v) { + if (!_backendBtn) return; + const opt = _backendSource?.querySelector(`option[value="${CSS.escape(v)}"]`); + const label = opt ? opt.textContent : v; + if (_backendBtnLabel) _backendBtnLabel.textContent = label; + if (_backendBtnIconSlot) _backendBtnIconSlot.innerHTML = _BACKEND_GLYPHS[v] || _BACKEND_GLYPHS.vllm; + } + + function _renderBackendMenu() { + if (!_backendMenu || !_backendSource) return; + const items = Array.from(_backendSource.options).map(o => ({ value: o.value, label: o.textContent })); + _backendMenu.innerHTML = items.map(it => ` + + `).join(''); + // Hover styling (no global CSS rule — keep it self-contained). + _backendMenu.querySelectorAll('.hwfit-backend-item').forEach(btn => { + btn.addEventListener('mouseenter', () => { btn.style.background = 'color-mix(in srgb, var(--fg) 8%, transparent)'; }); + btn.addEventListener('mouseleave', () => { btn.style.background = ''; }); + btn.addEventListener('click', (ev) => { + ev.preventDefault(); + ev.stopPropagation(); + const v = btn.dataset.value; + if (_backendSource && _backendSource.value !== v) { + _backendSource.value = v; + _backendSource.dispatchEvent(new Event('change', { bubbles: true })); + } + _setBackendBtnState(v); + _closeBackendMenu(); + }); + }); + } + + function _openBackendMenu() { + if (!_backendMenu || !_backendBtn) return; + _backendMenu.hidden = false; + _backendBtn.setAttribute('aria-expanded', 'true'); + } + function _closeBackendMenu() { + if (!_backendMenu || !_backendBtn) return; + _backendMenu.hidden = true; + _backendBtn.setAttribute('aria-expanded', 'false'); + } + if (_backendBtn) { + _backendBtn.addEventListener('click', (ev) => { + ev.preventDefault(); + ev.stopPropagation(); + if (_backendMenu.hidden) _openBackendMenu(); + else _closeBackendMenu(); + }); + document.addEventListener('click', (ev) => { + if (!_backendMenu.hidden && !_backendPicker?.contains(ev.target)) _closeBackendMenu(); + }); + document.addEventListener('keydown', (ev) => { + if (ev.key === 'Escape' && !_backendMenu.hidden) { + ev.stopPropagation(); + _closeBackendMenu(); + } + }, { capture: true }); + } + _renderBackendMenu(); + _setBackendBtnState(_backendSource?.value || defaultBackend); + function updateBackendVisibility() { const b = panel.querySelector('[data-field="backend"]')?.value || 'vllm'; panel.querySelectorAll('[class*="hwfit-backend-"]').forEach(el => { + // Skip the entire backend-picker subtree — the picker's own + // classes (`hwfit-backend-picker`, `-btn`, `-menu`, `-item`, + // `-btn-icon`, `-btn-label`, `-item-icon`, `-item-label`) all + // match the wildcard and would get hidden as if they were + // "backend-specific form sections", which left the dropdown + // looking empty / collapsed. + if (el.closest('.hwfit-backend-picker')) return; const show = el.classList.contains(`hwfit-backend-${b}`); el.style.display = show ? '' : 'none'; }); + _setBackendBtnState(b); } updateBackendVisibility(); @@ -974,51 +1082,58 @@ function _rerenderCachedModels() { // Mirror the message into a small chip next to the model title at // the top of the card, so the readiness state is visible without // having to look down into the panel body. + // Clean up any title chip from previous versions — the readiness + // text now lives inside the panel at the top, not in the card title. const card = panel.closest('.doclib-card, .memory-item'); const titleEl = card ? card.querySelector('.memory-item-title') : null; - let titleChip = titleEl ? titleEl.querySelector('.hwfit-serve-runtime-chip') : null; - const ensureChip = () => { - if (!titleEl) return null; - if (!titleChip) { - titleChip = document.createElement('span'); - titleChip.className = 'hwfit-serve-runtime-chip'; - titleChip.style.cssText = 'margin-left:8px;font-size:10.5px;font-weight:400;opacity:0.7;white-space:normal;line-height:1.3;'; - titleEl.appendChild(titleChip); - } - return titleChip; - }; + const titleChip = titleEl ? titleEl.querySelector('.hwfit-serve-runtime-chip') : null; + if (titleChip) titleChip.remove(); const backend = panel.querySelector('[data-field="backend"]')?.value || 'vllm'; + const noteText = note.querySelector('.hwfit-serve-runtime-text'); + const _writeNote = (s) => { if (noteText) noteText.textContent = s; else note.textContent = s; }; if (!['vllm', 'sglang', 'llamacpp', 'diffusers'].includes(backend)) { note.style.display = 'none'; - note.textContent = ''; - if (titleChip) titleChip.remove(); + _writeNote(''); return; } + // Wire dismiss once per note element. + const _closeBtn = note.querySelector('.hwfit-serve-runtime-close'); + if (_closeBtn && !_closeBtn._wired) { + _closeBtn._wired = true; + _closeBtn.addEventListener('click', (ev) => { + ev.preventDefault(); + ev.stopPropagation(); + note.style.display = 'none'; + panel._runtimeNoteDismissed = true; + }); + } + // If the user dismissed it earlier on this panel, don't re-show. + if (panel._runtimeNoteDismissed) return; const seq = (panel._runtimeReadinessSeq || 0) + 1; panel._runtimeReadinessSeq = seq; - // The in-panel note becomes a hidden source-of-truth; the visible - // copy lives in the title chip. - note.style.display = 'none'; - const chip = ensureChip(); - if (chip) chip.textContent = 'Checking runtime on selected server…'; + note.style.display = ''; + _writeNote('Checking runtime on selected server…'); + note.style.borderColor = ''; + note.style.color = 'var(--fg-muted)'; try { const { pkg, target } = await _fetchServeRuntimePackage(panel, backend); if (panel._runtimeReadinessSeq !== seq) return; - const text = _runtimeNoteText(backend, pkg, target); - note.textContent = text; - if (chip) { - chip.textContent = text; - chip.style.color = pkg?.installed ? 'inherit' : 'var(--red)'; - chip.style.opacity = pkg?.installed ? '0.7' : '1'; + _writeNote(_runtimeNoteText(backend, pkg, target)); + if (!pkg?.installed) { + note.style.color = 'var(--red)'; + note.style.borderColor = 'color-mix(in srgb, var(--red) 40%, transparent)'; + note.style.background = 'color-mix(in srgb, var(--red) 8%, transparent)'; + } else { + // Healthy / ready → green so the user reads "good to go" at a + // glance instead of scanning fg-muted for a state. + note.style.color = 'var(--green, #4caf50)'; + note.style.borderColor = 'color-mix(in srgb, var(--green, #4caf50) 40%, transparent)'; + note.style.background = 'color-mix(in srgb, var(--green, #4caf50) 8%, transparent)'; } } catch (err) { if (panel._runtimeReadinessSeq !== seq) return; - const text = `Runtime readiness unavailable: ${err?.message || err}`; - note.textContent = text; - if (chip) { - chip.textContent = text; - chip.style.color = 'var(--fg-muted)'; - } + _writeNote(`Runtime readiness unavailable: ${err?.message || err}`); + note.style.color = 'var(--fg-muted)'; } } updateRuntimeReadinessNote(); @@ -1717,15 +1832,39 @@ function _rerenderCachedModels() { // Cancel button — collapses the serve config panel (same effect as // tapping the row to toggle it shut). Mobile users wanted an explicit // "back out" affordance next to Launch. - panel.querySelector('.hwfit-serve-cancel')?.addEventListener('click', (ev) => { - ev.stopPropagation(); + const _collapsePanel = () => { panel._cleanupRuntimeReadiness?.(); panel.remove(); item.classList.remove('doclib-card-expanded'); item.style.flexDirection = ''; item.style.alignItems = ''; if (list) { list.style.minHeight = ''; list.style.maxHeight = ''; } + }; + panel.querySelector('.hwfit-serve-cancel')?.addEventListener('click', (ev) => { + ev.stopPropagation(); + _collapsePanel(); }); + // Esc anywhere on the page closes the open serve panel. Skips when + // the user is typing in a field — they want Esc to deselect / blur + // those, not collapse the form they're configuring. + const _onEscClose = (ev) => { + if (ev.key !== 'Escape') return; + if (!panel.isConnected) { + document.removeEventListener('keydown', _onEscClose, true); + return; + } + const t = ev.target; + const inField = t && ( + t.tagName === 'INPUT' || t.tagName === 'TEXTAREA' || t.tagName === 'SELECT' || t.isContentEditable + ); + if (inField) return; + // Skip when one of the dropdown/menu popovers is open — the + // popovers handle their own Esc and use stopPropagation, so any + // Esc that bubbles here means nothing else claimed it. + ev.stopPropagation(); + _collapsePanel(); + }; + document.addEventListener('keydown', _onEscClose, true); // Launch button panel.querySelector('.hwfit-serve-launch').addEventListener('click', async (ev) => { @@ -1780,6 +1919,50 @@ function _rerenderCachedModels() { else serveState[el.dataset.field] = el.value; }); serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm'; + + // Pre-launch: check our own task list for a serve already running + // on this host. Offer to stop+launch as the default action — the + // SSH-based port probe below is more thorough but it can miss + // when SSH glitches or `ss` isn't installed. This catches the + // common case instantly without waiting for a network round-trip. + try { + const _runningMod = await import('./cookbookRunning.js'); + const _hostStr = _envState.remoteHost || ''; + const _active = (_runningMod._loadTasks ? _runningMod._loadTasks() : []).filter(t => + t && t.type === 'serve' + && (t.remoteHost || '') === _hostStr + && (t.status === 'running' || t.status === 'ready' || t._serveReady) + ); + if (_active.length) { + const _names = _active.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean); + const _ok = await window.styledConfirm( + `${_active.length} model${_active.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`, + { title: 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' }, + ); + if (!_ok) { _restoreLaunchBtn(); return; } + // Kill each active serve; prefer the rendered Stop button so + // endpoint cleanup + Ollama unload run normally. Fall back to + // a raw tmux kill when the Active tab isn't in the DOM. + for (const t of _active) { + try { + const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`); + const _btn = _el?.querySelector('.cookbook-task-action-stop'); + if (_btn) { + _btn.click(); + } else if (_runningMod._tmuxGracefulKill) { + await fetch('/api/shell/exec', { + method: 'POST', credentials: 'same-origin', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }), + }); + } + } catch (_killErr) { /* best-effort */ } + } + // Give the OS a beat to release port 8000. + await new Promise(r => setTimeout(r, 2500)); + } + } catch (_e) { /* best-effort */ } + const backendWarning = _serveBackendWarning(m, repo, serveState.backend, serveState); if (backendWarning) { _restoreLaunchBtn();