// ============================================ // COOKBOOK SERVE SUB-MODULE // Serve tab: cached model list, serve panel building, // command building, preset slots, launch logic // ============================================ import uiModule from './ui.js'; import spinnerModule from './spinner.js'; import { providerLogo } from './providers.js'; import { modelColor } from './chatRenderer.js'; import { bindMenuDismiss, dismissOrRemove } from './escMenuStack.js'; import { openCookbookDependencies } from './cookbook-diagnosis.js'; // Shared state/functions injected by init() let _envState; let _sshCmd; let _getPort; let _sshPrefix; let _serverByVal; let _getPlatform; let _isWindows; let _isMetal; let _buildEnvPrefix; let _buildServeCmd; let _shellQuote; let _psQuote; let _detectBackend; let _detectToolParser; let _detectModelOptimizations; let _loadPresets; let _savePresets; let _copyText; let _persistEnvState; let _getGpuToggleTotal; let modelLogo; let esc; let _launchServeTask; let _retryDownload; let _nextAvailablePort; // Storage keys const SERVE_STATE_KEY = 'cookbook-serve-state'; let _cachedAllModels = []; function _repoLooksAwqLike(model, repo) { const q = String(model?.quant || '').toUpperCase(); const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase(); return /^AWQ|^GPTQ/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8)\b/i.test(n); } function _repoLooksGgufLike(model, repo) { const q = String(model?.quant || '').toUpperCase(); const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase(); return !!model?.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || n.includes('gguf'); } function _serveBackendWarning(model, repo, backend, fields = {}) { const awqLike = _repoLooksAwqLike(model, repo); const ggufLike = _repoLooksGgufLike(model, repo); if (awqLike && (backend === 'llamacpp' || backend === 'ollama')) { return { title: 'AWQ needs vLLM or SGLang', body: 'This model looks like AWQ/GPTQ/FP8 safetensors. llama.cpp and Ollama need GGUF files, so this backend cannot serve it. Choose vLLM/SGLang on a CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama.', }; } if (awqLike && _isMetal() && (backend === 'vllm' || backend === 'sglang')) { return { title: 'AWQ is not a unified-memory path', body: 'This model looks like AWQ/GPTQ/FP8 safetensors. AWQ is for vLLM/SGLang on CUDA/ROCm-style GPU servers, not local unified-memory llama.cpp/Ollama serving. For unified memory, download a GGUF model and use llama.cpp/Ollama.', }; } if (awqLike && fields.unified_mem) { return { title: 'AWQ is not a unified-memory path', body: 'This model looks like AWQ/GPTQ/FP8 safetensors, but unified-memory local serving expects GGUF. Use vLLM/SGLang on a compatible GPU server, or download a GGUF version for llama.cpp/Ollama.', }; } if (ggufLike && (backend === 'vllm' || backend === 'sglang')) { return { title: 'GGUF needs llama.cpp or Ollama', body: 'This model looks like GGUF. vLLM/SGLang expect HuggingFace safetensors-style repos. Choose llama.cpp/Ollama for GGUF, or download a safetensors model for vLLM/SGLang.', }; } return null; } function _hasOwn(obj, key) { return Object.prototype.hasOwnProperty.call(obj || {}, key); } function _allGpuIds(count) { const n = Number(count || 0); if (!Number.isFinite(n) || n <= 0) return ''; return Array.from({ length: Math.floor(n) }, (_, i) => String(i)).join(','); } function _selectedServeTarget(panel) { const select = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server'); const servers = Array.isArray(_envState.servers) ? _envState.servers : []; let host = _envState.remoteHost || ''; let server = host ? (_serverByVal?.(_envState.remoteServerKey || host) || servers.find(s => s.host === host)) : null; if (select && select.value != null) { if (select.value === 'local') { host = ''; server = servers.find(s => !s.host || s.host === 'local') || null; } else { const idx = /^\d+$/.test(String(select.value)) ? parseInt(select.value, 10) : -1; server = _serverByVal?.(select.value) || (idx >= 0 ? servers[idx] : null) || null; host = server?.host || ''; } } const venv = panel?.querySelector('[data-field="venv"]')?.value?.trim() || server?.envPath || _envState.envPath || ''; const label = host ? (server?.name ? `${server.name} (${host})` : host) : (server?.name || 'local server'); return { host, port: host ? (_getPort(host) || server?.port || '') : '', venv, platform: server?.platform || _envState.platform || '', label, }; } async function _fetchServeRuntimePackage(panel, backend) { const packageByBackend = { vllm: 'vllm', sglang: 'sglang', llamacpp: 'llama_cpp', diffusers: 'diffusers', }; const packageName = packageByBackend[backend]; if (!packageName) return null; const target = _selectedServeTarget(panel); const params = new URLSearchParams(); if (target.host) { params.set('host', target.host); if (target.port) params.set('ssh_port', target.port); if (target.venv) params.set('venv', target.venv); } const res = await fetch('/api/cookbook/packages' + (params.toString() ? '?' + params.toString() : ''), { credentials: 'same-origin' }); if (!res.ok) throw new Error(`HTTP ${res.status}`); const data = await res.json(); const pkg = (data.packages || []).find(p => p.name === packageName); return { pkg, target }; } function _runtimeNoteText(backend, pkg, target) { const labels = { vllm: 'vLLM', sglang: 'SGLang', llamacpp: 'llama.cpp', diffusers: 'Diffusers' }; const label = labels[backend] || backend; if (!pkg) return `${label} readiness unavailable for ${target.label}.`; const note = pkg.status_note || pkg.update_note || ''; if (pkg.installed) { return note ? `${label} ready on ${target.label}: ${note}` : `${label} ready on ${target.label}.`; } return note ? `${label} missing on ${target.label}: ${note}` : `${label} missing on ${target.label}.`; } // ── Filter/sort cached model list ── function _filterCachedList() { const list = document.getElementById('hwfit-cached-list'); const tagContainer = document.getElementById('serve-tags'); if (!list) return; const activeTag = tagContainer?.querySelector('.memory-cat-chip.active')?.dataset.serveTag || ''; const searchVal = (document.getElementById('serve-search')?.value || '').toLowerCase().trim(); const isFamily = activeTag.startsWith('fam:'); const familyVal = isFamily ? activeTag.slice(4) : ''; list.querySelectorAll('.memory-item[data-repo]').forEach(item => { const repo = (item.dataset.repo || '').toLowerCase(); const tag = item.dataset.tag || ''; const family = item.dataset.family || ''; const tagMatch = !activeTag || (isFamily ? family === familyVal : tag === activeTag); const searchMatch = !searchVal || repo.includes(searchVal); item.style.display = (tagMatch && searchMatch) ? '' : 'none'; }); } // Is there a live download task for this repo in the Running tab? The cache // reports any incomplete download dir as "downloading", but if nothing is // actively pulling it, it's really a stalled/partial download — so we label it // accordingly. Reads the running-tab tasks straight from localStorage (same // key the running module writes) to avoid a cross-module import cycle. function _isActivelyDownloading(repoId) { try { const tasks = JSON.parse(localStorage.getItem('cookbook-tasks')) || []; const short = (repoId || '').split('/').pop(); return tasks.some(t => t.type === 'download' && t.status === 'running' && (t.payload?.repo_id === repoId || t.name === repoId || t.name === short || (t.payload?.repo_id || '').split('/').pop() === short)); } catch { return false; } } // Same idea for serve: is there a live serve task for this repo? Used to // surface a "running" pill on the Serve tab card. function _isActivelyServing(repoId) { try { const tasks = JSON.parse(localStorage.getItem('cookbook-tasks')) || []; const short = (repoId || '').split('/').pop(); return tasks.some(t => t.type === 'serve' && t.status === 'running' && (t.payload?.repo_id === repoId || t.name === repoId || t.name === short || (t.payload?.repo_id || '').split('/').pop() === short)); } catch { return false; } } function _formatGgufSize(bytes) { const n = Number(bytes || 0); if (!Number.isFinite(n) || n <= 0) return ''; if (n >= 1024 ** 3) return `${(n / (1024 ** 3)).toFixed(1)} GB`; if (n >= 1024 ** 2) return `${Math.round(n / (1024 ** 2))} MB`; return `${Math.max(1, Math.round(n / 1024))} KB`; } function _ggufFilesForModel(model) { return Array.isArray(model?.gguf_files) ? model.gguf_files.filter(f => f && typeof f.rel_path === 'string' && f.rel_path) : []; } function _runnableGgufFiles(model) { const files = _ggufFilesForModel(model); const primary = files.filter(f => (f.role || 'model') === 'model'); return primary.length ? primary : files; } function _ggufFileLabel(file) { const base = (file.name || file.rel_path || '').split('/').pop(); const size = _formatGgufSize(file.size_bytes); const quant = file.quant ? `${file.quant} ` : ''; const parts = Number(file.parts || 0); const split = parts > 1 ? `, ${parts} parts` : ''; const role = file.role && file.role !== 'model' ? ` ${file.role}` : ''; return `${quant}${base}${size || split ? ` (${[size, split.replace(/^, /, '')].filter(Boolean).join(', ')})` : ''}${role}`; } function _shellPathExpr(path) { const s = String(path || ''); if (s === '~') return '${HOME}'; if (s.startsWith('~/')) return '${HOME}' + _shellQuote(s.slice(1)); return _shellQuote(s); } function _selectedGgufExpr(model, repo, relPath) { const rel = String(relPath || '').replace(/^\/+/, ''); if (!rel) return ''; if (model.is_local_dir && model.path) { const base = String(model.path || '').replace(/\/+$/, ''); return `$(printf %s ${_shellPathExpr(`${base}/${repo}/${rel}`)})`; } if (model.path) { const base = String(model.path || '').replace(/\/+$/, ''); return `$(printf %s ${_shellPathExpr(`${base}/models--${repo.replace(/\//g, '--')}/snapshots/${rel}`)})`; } const cacheRepo = repo.replace(/\//g, '--'); return `$(printf %s \${HOME}${_shellQuote(`/.cache/huggingface/hub/models--${cacheRepo}/snapshots/${rel}`)})`; } function _ggufSearchDirExpr(model, repo) { if (model.is_local_dir && model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/${repo}`); if (model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/models--${repo.replace(/\//g, '--')}/snapshots`); return `"$HOME/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}/snapshots"`; } function _rerenderCachedModels() { const list = document.getElementById('hwfit-cached-list'); const tagContainer = document.getElementById('serve-tags'); if (!list || !_cachedAllModels.length) return; const allModels = _cachedAllModels; const _h = (text) => `?`; const activeTag = tagContainer?.querySelector('.memory-cat-chip.active')?.dataset.serveTag || ''; const searchVal = (document.getElementById('serve-search')?.value || '').toLowerCase().trim(); const sortVal = document.getElementById('serve-sort')?.value || 'name'; const _parseSize = (s) => { const m = (s || '').match(/([\d.]+)\s*(GB|MB|KB)/i); if (!m) return 0; const n = parseFloat(m[1]); if (m[2] === 'GB') return n * 1024; if (m[2] === 'MB') return n; return n / 1024; }; if (sortVal === 'name') allModels.sort((a, b) => (a.repo_id || '').localeCompare(b.repo_id || '')); else if (sortVal === 'size-desc') allModels.sort((a, b) => _parseSize(b.size) - _parseSize(a.size)); else if (sortVal === 'size-asc') allModels.sort((a, b) => _parseSize(a.size) - _parseSize(b.size)); else if (sortVal === 'recent') allModels.sort((a, b) => (b.mtime || 0) - (a.mtime || 0)); let html = ''; let visibleCount = 0; for (const m of allModels) { if (activeTag && m._tag !== activeTag) continue; if (searchVal && !(m.repo_id || '').toLowerCase().includes(searchVal)) continue; visibleCount++; const shortName = m.repo_id.split('/').pop() || m.repo_id; const hfLink = m.repo_id.includes('/') ? `https://huggingface.co/${m.repo_id}` : ''; const metaParts = []; if (m.repo_id.includes('/')) metaParts.push(m.repo_id.split('/')[0]); metaParts.push(m.size); if (m.path) { metaParts.push(`${esc(m.path)}`); } const ggufCount = _runnableGgufFiles(m).length; if (ggufCount > 1) metaParts.push(`${ggufCount} GGUFs`); // "downloading" status now renders as a title-row pill instead of // a meta-row text label, matching the "running" pill style and // living on the same line as the model name. const _isDownloading = m.status === 'downloading'; const _isDlActive = _isDownloading ? _isActivelyDownloading(m.repo_id) : false; const isSelectMode = document.getElementById('hwfit-cache-select')?.classList.contains('active'); html += `
`; html += ``; html += `
`; const _mc = modelColor(m.repo_id) || ''; const _runningPill = _isActivelyServing(m.repo_id) ? ` running` : ''; const _downloadingPill = _isDownloading ? ` ${_isDlActive ? 'downloading' : 'stalled'}` : ''; html += `
${modelLogo(m.repo_id)}${esc(shortName)}${hfLink ? ` HF ↗` : ''}${_runningPill}${_downloadingPill}
`; html += `
${metaParts.join(' \u00b7 ')}
`; html += `
`; const _bk = _detectBackend(m).backend; const _bkIco = _bk === 'llamacpp' ? '' : _bk === 'diffusers' ? '' : ''; html += `${_bkIco}`; html += `
`; html += `
`; } if (!visibleCount) html += '
No matching models
'; list.innerHTML = html; // Wire tag chips if (tagContainer) { tagContainer.querySelectorAll('.memory-cat-chip').forEach(chip => { chip.addEventListener('click', () => { tagContainer.querySelectorAll('.memory-cat-chip').forEach(c => c.classList.remove('active')); chip.classList.add('active'); _filterCachedList(); }); }); } // Long-press anywhere on a cached model card → click its ⋮ menu, so // mobile users don't have to hit the small 3-dot target precisely. list.querySelectorAll('.memory-item').forEach(item => { const menuBtn = item.querySelector('.hwfit-cached-menu-btn'); if (!menuBtn || item.dataset.lpWired === '1') return; item.dataset.lpWired = '1'; let _t = null; let _y = 0; const _cancel = () => { if (_t) { clearTimeout(_t); _t = null; } }; item.addEventListener('touchstart', (e) => { if (e.target.closest('button, a, input, textarea, .hwfit-cached-dropdown')) return; _y = e.touches?.[0]?.clientY ?? 0; _t = setTimeout(() => { _t = null; try { menuBtn.click(); } catch {} }, 500); }, { passive: true }); item.addEventListener('touchmove', (e) => { const y = e.touches?.[0]?.clientY ?? 0; if (Math.abs(y - _y) > 8) _cancel(); }, { passive: true }); item.addEventListener('touchend', _cancel, { passive: true }); item.addEventListener('touchcancel', _cancel, { passive: true }); }); // Wire menu on each cached model list.querySelectorAll('.hwfit-cached-menu-btn').forEach(btn => { btn.addEventListener('click', (e) => { e.stopPropagation(); // Toggle: if a dropdown for THIS button is already open, close it // (through its own dismiss so the Escape-stack entry goes with it). const existing = document.querySelector('.hwfit-cached-dropdown'); if (existing && existing._anchor === btn) { if (typeof existing._dismiss === 'function') existing._dismiss(); else { existing.remove(); btn.classList.remove('cookbook-menu-active'); } return; } // Otherwise close any other open menu (and clear its anchor's active // state) before opening fresh. document.querySelectorAll('.hwfit-cached-dropdown').forEach(d => { if (d._anchor) d._anchor.classList.remove('cookbook-menu-active'); if (typeof d._dismiss === 'function') d._dismiss(); else d.remove(); }); const item = btn.closest('.memory-item'); const repo = item?.dataset.repo; if (!repo) return; const m = allModels.find(x => x.repo_id === repo); const dropdown = document.createElement('div'); dropdown.className = 'hwfit-cached-dropdown'; dropdown._anchor = btn; btn.classList.add('cookbook-menu-active'); // Shared close — used by every item, the mobile Cancel, outside-click, // and the Escape arbiter (reassigned to the registry-aware close below). let closeDropdown = () => { dropdown.remove(); btn.classList.remove('cookbook-menu-active'); }; const _di = (svg) => `${svg}`; const _serveIco = ''; const _retryIco = ''; const _deleteIco = ''; const _selectIco = ''; const _schedIco = ''; const items = []; if (m && m.status === 'ready') items.push({ label: 'Serve', icon: _serveIco, action: 'serve' }); if (m && m.status === 'downloading') items.push({ label: 'Retry', icon: _retryIco, action: 'retry' }); if (m && m.status === 'ready') items.push({ label: 'Schedule…', icon: _schedIco, action: 'schedule' }); items.push({ label: 'Select', icon: _selectIco, action: 'select' }); items.push({ label: 'Delete', icon: _deleteIco, action: 'delete', danger: true }); for (const opt of items) { const div = document.createElement('div'); div.className = 'dropdown-item-compact' + (opt.danger ? ' dropdown-item-danger' : ''); div.innerHTML = _di(opt.icon) + '' + opt.label + ''; div.addEventListener('click', () => { closeDropdown(); if (opt.action === 'serve') item.click(); else if (opt.action === 'delete') _deleteCachedModel(repo, item, false, m); else if (opt.action === 'retry') _retryCachedModel(repo, m); else if (opt.action === 'schedule') { // Same entry point as the ^ button next to Launch — let // cookbookSchedule.js handle it. Expand the panel first // so the form has somewhere to mount. if (!item.querySelector('.hwfit-serve-panel')) item.click(); setTimeout(() => { const arrow = item.querySelector('.hwfit-serve-schedule-arrow'); if (arrow) arrow.click(); }, 120); } else if (opt.action === 'select') { const selectBtn = document.getElementById('hwfit-cache-select'); const bulkBar = document.getElementById('serve-bulk-bar'); if (selectBtn) { selectBtn.classList.add('active'); selectBtn.textContent = 'Cancel'; } if (bulkBar) bulkBar.classList.remove('hidden'); document.querySelectorAll('.serve-select-cb').forEach(dot => { dot.style.display = 'inline-block'; }); const dot = item.querySelector('.serve-select-cb'); if (dot) dot.classList.add('selected'); const count = document.querySelectorAll('.serve-select-cb.selected').length; const countEl = document.getElementById('serve-bulk-count'); if (countEl) countEl.textContent = count + ' selected'; const all = document.getElementById('serve-select-all'); const dots = document.querySelectorAll('.serve-select-cb'); if (all) all.checked = dots.length > 0 && count === dots.length; } }); dropdown.appendChild(div); } // Mobile-only Cancel — gives an explicit close on touch devices where // outside-tap-to-close is fiddly. Hidden on desktop via CSS. const _cancelIco = ''; const cancelDiv = document.createElement('div'); cancelDiv.className = 'dropdown-item-compact dropdown-cancel-mobile'; cancelDiv.innerHTML = _di(_cancelIco) + 'Cancel'; cancelDiv.addEventListener('click', () => { closeDropdown(); }); dropdown.appendChild(cancelDiv); const rect = btn.getBoundingClientRect(); dropdown.style.cssText = `position:fixed;z-index:10001;visibility:hidden;top:0;right:${window.innerWidth-rect.right}px;background:var(--panel);border:1px solid var(--border);border-radius:8px;padding:4px;box-shadow:0 8px 24px rgba(0,0,0,0.3);font-size:12px;`; document.body.appendChild(dropdown); // Clamp into the VISIBLE area (visualViewport, not innerHeight — they differ // on mobile under the dynamic toolbar). Flip above the button if there's no // room below, else clamp to the visible bottom edge, so it never runs // off-screen / grows the page. { const vv = window.visualViewport; const viewTop = vv ? vv.offsetTop : 0; const viewBottom = vv ? vv.offsetTop + vv.height : window.innerHeight; const dh = dropdown.offsetHeight; const mm = 8; let top = rect.bottom + 2; if (top + dh > viewBottom - mm) { const above = rect.top - 2 - dh; top = above >= viewTop + mm ? above : Math.max(viewTop + mm, viewBottom - dh - mm); } dropdown.style.top = top + 'px'; dropdown.style.visibility = ''; } closeDropdown = bindMenuDismiss(dropdown, () => { dropdown.remove(); btn.classList.remove('cookbook-menu-active'); }, (ev) => !dropdown.contains(ev.target) && ev.target !== btn); }); }); // Wire click on card to expand serve panel list.querySelectorAll('.memory-item[data-repo]').forEach(item => { item.addEventListener('click', (e) => { if (e.target.closest('a, .hwfit-cached-menu-btn, .memory-item-btn, .hwfit-serve-panel')) return; if (document.getElementById('hwfit-cache-select')?.classList.contains('active')) return; const repo = item.dataset.repo; if (!repo) return; const m = allModels.find(x => x.repo_id === repo); if (!m || m.status !== 'ready') return; // Toggle — close if already open if (item.classList.contains('doclib-card-expanded')) { const existingPanel = item.querySelector('.hwfit-serve-panel'); existingPanel?._cleanupRuntimeReadiness?.(); existingPanel?.remove(); item.classList.remove('doclib-card-expanded'); item.style.flexDirection = ''; item.style.alignItems = ''; list.style.minHeight = ''; list.style.maxHeight = ''; return; } // Collapse any other expanded list.querySelectorAll('.doclib-card-expanded').forEach(c => { const openPanel = c.querySelector('.hwfit-serve-panel'); openPanel?._cleanupRuntimeReadiness?.(); openPanel?.remove(); c.classList.remove('doclib-card-expanded'); c.style.flexDirection = ''; c.style.alignItems = ''; }); const shortName = repo.split('/').pop(); const _es = _envState; // The venv set per-server in Settings (server.envPath). Used as the venv // field default when the global active env path isn't carrying it, so a // configured server venv shows up without re-typing it. const _selSrv = _serverByVal?.(_es.remoteServerKey || _es.remoteHost || '') || {}; const _srvVenv = _selSrv.envPath || ''; // Serve state schema: { _byRepo: { : {...} }, _lastUsed: {...} }. // Loading priority: this-repo's saved settings → last-used (from any // model) as sensible first-run defaults → fall through to code defaults. // Legacy flat state (pre-schema) is also accepted as a last-resort fallback. let _allSs = {}; try { _allSs = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {} const _byRepo = (_allSs && typeof _allSs === 'object' && _allSs._byRepo) || {}; const _lastUsed = (_allSs && typeof _allSs === 'object' && _allSs._lastUsed) || null; const _isLegacyFlat = _allSs && typeof _allSs === 'object' && !_allSs._byRepo && !_allSs._lastUsed; const ss = (_byRepo[repo] && typeof _byRepo[repo] === 'object') ? _byRepo[repo] : (_lastUsed || (_isLegacyFlat ? _allSs : {})); const detectedBackend = _detectBackend(m).backend; const _allowedBackends = new Set(_isWindows() ? ['llamacpp'] : (_isMetal() ? ['llamacpp', 'ollama'] : ['vllm', 'sglang', 'llamacpp', 'ollama', 'diffusers'])); const defaultBackend = (ss._forceBackend && ss.backend && _allowedBackends.has(ss.backend)) ? ss.backend : detectedBackend; const savedMatchesBackend = !!ss._forceBackend || (ss.backend || 'vllm') === detectedBackend; const sv = (k, def) => (ss[k] !== undefined && savedMatchesBackend) ? ss[k] : def; const defaultTp = defaultBackend === 'llamacpp' ? '1' : sv('tp', '1'); const detectedGpuIds = _allGpuIds(_getGpuToggleTotal?.()); const defaultGpus = defaultBackend === 'llamacpp' ? '0' : (savedMatchesBackend && _hasOwn(ss, 'gpus') && String(ss.gpus || '').trim() ? ss.gpus : (_es.gpus || detectedGpuIds)); const tpOpts = [1,2,4,8].map(n => `${n}`).join(''); const dtypeOpts = ['auto','float16','bfloat16'].map(d => ``).join(''); // KV cache default — most models are fine on auto, but a few // (e.g. DeepSeek-V3/V4/R1 MoE) need fp8 explicitly or the launch // OOMs. _detectModelOptimizations seeds opts.kvCacheDtype for // those families; honour it unless the user has a saved override. const _kvOptsCheck = _detectModelOptimizations(repo); const _kvAutoDefault = (_kvOptsCheck && _kvOptsCheck.kvCacheDtype) || 'auto'; const _kvSelected = sv('vllm_kv_cache_dtype', _kvAutoDefault); const vllmKvCacheOpts = ['auto','fp8'].map(d => ``).join(''); const _l = (name, tip) => `${name}?`; const _ggufChoices = _runnableGgufFiles(m); const _savedGguf = String(sv('gguf_file', '') || ''); const _defaultGguf = _ggufChoices.some(f => f.rel_path === _savedGguf) ? _savedGguf : (_ggufChoices[0]?.rel_path || ''); const _ggufOptions = _ggufChoices.map(f => `` ).join(''); // Build save slots const _allPresets = _loadPresets(); const _repoShort = repo.split('/').pop(); const _modelPresets = _presetsForModel(_allPresets, repo); // Saved configs live in a single dropdown (used to be a row of squeezed // chips). The toggle shows the count; the menu lists each config (click to // load, × to delete) plus a "Save current config" row — see _showSavedConfigMenu. // Split button: "Save" saves the current config directly; the arrow opens // the dropdown of saved configs (load / delete). Arrow shows the count. // The arrow button shows just the saved-config count next to a "▾". // Spell out what the number means in the tooltip so users don't have // to click it to find out the badge isn't a notification dot. const _arrowLabel = _modelPresets.length > 0 ? `${_modelPresets.length} ▾` : '▾'; const _arrowTitle = _modelPresets.length > 0 ? `${_modelPresets.length} saved launch config${_modelPresets.length === 1 ? '' : 's'} for ${_repoShort} — click ▾ to load or delete` : `No saved launch configs for ${_repoShort} yet — click Save to add one`; // Wrap the Save split in a