from
// the first existing button as the insertion target.
try {
const _existing = Array.from(panel.querySelectorAll('.cookbook-gpu-btn'));
const _grp = _existing[0] && _existing[0].parentElement;
if (_grp) {
const _have = new Set(_existing.map(b => parseInt(b.dataset.gpu, 10)));
const _activeStr = (panel.querySelector('[data-field="gpus"]')?.value || '').split(',').map(s => s.trim());
data.gpus.forEach(g => {
if (_have.has(g.index)) return;
const _b = document.createElement('button');
_b.type = 'button';
_b.className = 'cookbook-gpu-btn' + (_activeStr.includes(String(g.index)) ? ' active' : '');
_b.dataset.gpu = String(g.index);
_b.textContent = String(g.index);
_grp.appendChild(_b);
// Re-wire the click handler the same way the panel did
// on first render. Toggles active + rewrites the hidden
// gpus input from the live set of active buttons.
_b.addEventListener('click', () => {
_b.classList.toggle('active');
const activeBtns = [...panel.querySelectorAll('.cookbook-gpu-btn.active')];
const ids = activeBtns.map(x => x.dataset.gpu).sort((a, b) => +a - +b).join(',');
const hidden = panel.querySelector('[data-field="gpus"]');
if (hidden) { hidden.value = ids; hidden.dispatchEvent(new Event('change', { bubbles: true })); }
});
});
}
} catch (_) {}
panel.querySelectorAll('.cookbook-gpu-btn').forEach(b => {
const idx = parseInt(b.dataset.gpu);
const g = panel._gpuProbe.byIdx.get(idx);
b.classList.remove('gpu-free', 'gpu-busy', 'gpu-missing');
if (!g) {
// GPU doesn't exist on this server — hide it rather than show a
// dead button. The panel renders up to 8 before the count is known
// (e.g. a single-GPU box would otherwise show 0–7).
b.style.display = 'none';
b.classList.remove('active');
return;
}
b.style.display = '';
const freeGb = (g.free_mb / 1024).toFixed(1);
const totalGb = (g.total_mb / 1024).toFixed(1);
const procCount = (g.processes && g.processes.length) || 0;
const procLine = procCount
? `\n${procCount} process(es) — click to view/kill`
: '';
const backendLine = g.backend || data.backend ? `\nprobe: ${g.source || data.source || g.backend || data.backend}` : '';
b.title = `GPU ${idx} ${g.name}\n${freeGb} / ${totalGb} GB free · util ${g.util_pct}%${procLine}${backendLine}`;
// Treat any GPU with attached compute processes OR <85% free as busy.
const isBusy = procCount > 0 || g.busy;
b.classList.add(isBusy ? 'gpu-busy' : 'gpu-free');
});
if (!silent) {
if (data.gpus.length === 0) {
uiModule.showToast('No GPU memory probe data available', 4000);
} else {
const summary = data.gpus.map(g => {
const procs = (g.processes && g.processes.length) || 0;
return `GPU${g.index}: ${(g.free_mb/1024).toFixed(1)}G free` + (procs ? ` (${procs}p)` : '');
}).join(' · ');
uiModule.showToast(summary + ' · dbl-click a GPU button to view/kill processes', 7000);
}
}
return data;
};
_probeBtn.addEventListener('click', async () => {
try { await _withSpinner(_probeBtn, () => _runProbe(false)); }
catch (e) { uiModule.showToast('GPU probe error: ' + e.message, 6000); }
});
// Auto-probe (silent) on open so the GPU buttons reflect the real count
// — a single-GPU server should show just GPU 0, not the placeholder 0–7.
// Falls back to the full 0–7 set if the server is unreachable.
_runProbe(true).catch(() => {});
if (_clearBtn) {
_clearBtn.addEventListener('click', async () => {
try {
await _withSpinner(_clearBtn, async () => {
// Always probe first so we have fresh PID list
const data = await _runProbe();
if (!data) return;
const pids = [];
for (const g of data.gpus) {
for (const p of (g.processes || [])) pids.push({ pid: p.pid, name: p.name });
}
if (pids.length === 0) {
uiModule.showToast('No GPU processes to clear', 3000);
return;
}
const summary = pids.map(p => `${p.pid} (${p.name})`).join(', ');
if (!await window.styledConfirm(`Clear server GPU memory by sending SIGTERM to ${pids.length} process(es)?\n\n${summary}\n\nIf any survive, the next prompt can force-kill them with SIGKILL.`, { confirmText: 'SIGTERM', danger: true })) return;
// First pass: SIGTERM
const hostVal = panel._gpuProbe.host;
const results = await Promise.all(pids.map(p =>
fetch('/api/cookbook/kill-pid', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ pid: p.pid, signal: 'TERM', host: hostVal || null }),
}).then(r => r.json()).catch(e => ({ ok: false, error: e.message }))
));
const okCount = results.filter(r => r.ok).length;
uiModule.showToast(`SIGTERM → ${okCount}/${pids.length} processes`, 5000);
// Wait, then re-probe; if survivors, offer SIGKILL
await new Promise(r => setTimeout(r, 1500));
const after = await _runProbe();
if (!after) return;
const survivors = [];
for (const g of after.gpus) {
for (const p of (g.processes || [])) {
if (pids.some(orig => orig.pid === p.pid)) survivors.push(p);
}
}
if (survivors.length === 0) {
uiModule.showToast(`Cleared ${pids.length} GPU process(es)`, 4000);
return;
}
if (!await window.styledConfirm(`${survivors.length} process(es) survived SIGTERM:\n\n${survivors.map(p => p.pid + ' (' + p.name + ')').join(', ')}\n\nForce-kill with SIGKILL?`, { confirmText: 'SIGKILL', danger: true })) return;
const killResults = await Promise.all(survivors.map(p =>
fetch('/api/cookbook/kill-pid', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ pid: p.pid, signal: 'KILL', host: hostVal || null }),
}).then(r => r.json()).catch(e => ({ ok: false, error: e.message }))
));
const killOk = killResults.filter(r => r.ok).length;
uiModule.showToast(`SIGKILL → ${killOk}/${survivors.length} processes`, 5000);
await new Promise(r => setTimeout(r, 800));
await _runProbe();
});
} catch (e) {
uiModule.showToast('Clear Server error: ' + e.message, 6000);
}
});
}
// After probe, clicking a GPU button opens kill popup (Shift-click also toggles select)
panel.querySelectorAll('.cookbook-gpu-btn').forEach(btn => {
btn.addEventListener('contextmenu', (ev) => {
if (!panel._gpuProbe.byIdx) return;
const g = panel._gpuProbe.byIdx.get(parseInt(btn.dataset.gpu));
if (!g) return;
ev.preventDefault();
_openProbePopup(btn, g, panel._gpuProbe.host);
});
btn.addEventListener('dblclick', (ev) => {
if (!panel._gpuProbe.byIdx) return;
const g = panel._gpuProbe.byIdx.get(parseInt(btn.dataset.gpu));
if (!g) return;
ev.preventDefault();
_openProbePopup(btn, g, panel._gpuProbe.host);
});
});
}
// Update preview on input change
panel.querySelectorAll('.hwfit-sf').forEach(el => {
el.addEventListener('input', updateCmd);
el.addEventListener('change', (e) => {
if (e.target.dataset.field === 'backend') {
const extraEl = panel.querySelector('[data-field="extra"]');
if (extraEl) extraEl.value = '';
updateBackendVisibility();
updateRuntimeReadinessNote();
}
if (e.target.dataset.field === 'venv') {
updateRuntimeReadinessNote();
}
updateCmd();
});
});
// Themed +/- buttons next to spec_tokens — step the adjacent number input.
panel.querySelectorAll('.hwfit-numstep-btn').forEach(btn => {
btn.addEventListener('click', (e) => {
e.preventDefault();
e.stopPropagation();
const input = btn.parentElement?.querySelector('input[type="number"]');
if (!input) return;
const step = parseInt(btn.dataset.step, 10) || 0;
const min = input.min !== '' ? Number(input.min) : -Infinity;
const max = input.max !== '' ? Number(input.max) : Infinity;
const next = Math.min(max, Math.max(min, (Number(input.value) || 0) + step));
input.value = String(next);
input.dispatchEvent(new Event('input', { bubbles: true }));
input.dispatchEvent(new Event('change', { bubbles: true }));
});
});
// Track manual edits
let _cmdManuallyEdited = false;
const _cmdTextarea = panel.querySelector('.hwfit-serve-cmd');
if (_cmdTextarea) _cmdTextarea.addEventListener('input', () => { _cmdManuallyEdited = true; });
// Cancel button — collapses the serve config panel (same effect as
// tapping the row to toggle it shut). Mobile users wanted an explicit
// "back out" affordance next to Launch.
const _collapsePanel = () => {
panel._cleanupRuntimeReadiness?.();
panel.remove();
item.classList.remove('doclib-card-expanded');
item.style.flexDirection = '';
item.style.alignItems = '';
if (list) { list.style.minHeight = ''; list.style.maxHeight = ''; }
};
panel.querySelector('.hwfit-serve-cancel')?.addEventListener('click', (ev) => {
ev.stopPropagation();
_collapsePanel();
});
// Esc anywhere on the page closes the open serve panel. Skips when
// the user is typing in a field — they want Esc to deselect / blur
// those, not collapse the form they're configuring.
const _onEscClose = (ev) => {
if (ev.key !== 'Escape') return;
if (!panel.isConnected) {
document.removeEventListener('keydown', _onEscClose, true);
return;
}
const t = ev.target;
const inField = t && (
t.tagName === 'INPUT' || t.tagName === 'TEXTAREA' || t.tagName === 'SELECT' || t.isContentEditable
);
if (inField) return;
// Skip when one of the dropdown/menu popovers is open — the
// popovers handle their own Esc and use stopPropagation, so any
// Esc that bubbles here means nothing else claimed it.
ev.stopPropagation();
_collapsePanel();
};
document.addEventListener('keydown', _onEscClose, true);
// Launch button
panel.querySelector('.hwfit-serve-launch').addEventListener('click', async (ev) => {
const _launchBtn = ev.currentTarget;
// Immediate visual feedback. The GPU probe + backend-warning prompt
// below can take ~1-2s before the task UI shows up, leaving the
// button looking dead. Drop in the same whirlpool spinner the rest of
// the cookbook uses (Probe GPUs, dependency installs, etc.) right
// away; restored on any early-return / failure path below.
const _origBtnHtml = _launchBtn.innerHTML;
const _origBtnDisabled = _launchBtn.disabled;
let _launchingWp = null;
const _restoreLaunchBtn = () => {
try { _launchingWp?.destroy?.(); } catch {}
_launchingWp = null;
_launchBtn.innerHTML = _origBtnHtml;
_launchBtn.disabled = _origBtnDisabled;
};
_launchBtn.disabled = true;
_launchBtn.innerHTML = '';
const _launchingWrap = document.createElement('span');
_launchingWrap.className = 'hwfit-serve-launching';
_launchingWrap.style.cssText = 'display:inline-flex;align-items:center;gap:6px;';
_launchingWp = spinnerModule.createWhirlpool(18);
if (_launchingWp?.element) {
_launchingWp.element.style.margin = '0';
_launchingWp.element.style.transform = 'translateY(-2px)';
_launchingWrap.appendChild(_launchingWp.element);
}
const _launchingLabel = document.createElement('span');
_launchingLabel.textContent = 'Launching…';
_launchingWrap.appendChild(_launchingLabel);
_launchBtn.appendChild(_launchingWrap);
// Final safety net: never launch with ctx beyond the model's trained
// limit (or the absolute sanity ceiling when the limit is unknown). A
// stale preset or typo (e.g. 16000000) overflows and, with a quantized
// KV cache, can crash the GPU. Skip only if the user hand-edited the raw
// command (then we respect their literal text).
if (!_cmdManuallyEdited) _clampCtx(true);
if (!_cmdManuallyEdited) updateCmd();
// Pasted commands often carry hidden newlines / CRs / tabs from copies
// out of model cards or wrapped help text. The backend cmd allowlist
// rejects \n / \r outright (`Invalid characters in cmd`), so collapse
// all whitespace to single spaces before launch — same effect as the
// user manually re-flowing the textarea, no behavior change.
const _rawLaunchCmd = _cmdTextarea ? _cmdTextarea.value : panel._cmd;
const launchCmd = String(_rawLaunchCmd || '').replace(/\s+/g, ' ').trim();
if (_cmdTextarea && _cmdTextarea.value !== launchCmd) _cmdTextarea.value = launchCmd;
const serveState = {};
panel.querySelectorAll('.hwfit-sf').forEach(el => {
if (el.type === 'checkbox') serveState[el.dataset.field] = el.checked;
else serveState[el.dataset.field] = el.value;
});
serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm';
// Pre-launch: check our own task list for a serve already running
// on this host. Offer to stop+launch as the default action — the
// SSH-based port probe below is more thorough but it can miss
// when SSH glitches or `ss` isn't installed. This catches the
// common case instantly without waiting for a network round-trip.
try {
const _runningMod = await import('./cookbookRunning.js');
const _hostStr = _envState.remoteHost || '';
const _active = (_runningMod._loadTasks ? _runningMod._loadTasks() : []).filter(t =>
t && t.type === 'serve'
&& (t.remoteHost || '') === _hostStr
&& (t.status === 'running' || t.status === 'ready' || t._serveReady)
);
if (_active.length) {
const _names = _active.map(t => t.payload?.repo_id || t.repo || t.name || '?').filter(Boolean);
const _ok = await window.styledConfirm(
`${_active.length} model${_active.length === 1 ? '' : 's'} already serving on ${_hostStr || 'local'} (${_names.join(', ')}). Port 8000 will collide. Stop the running model and launch this one?`,
{ title: 'Server already running', confirmText: 'Stop & launch', cancelText: 'Cancel' },
);
if (!_ok) { _restoreLaunchBtn(); return; }
// Kill each active serve; prefer the rendered Stop button so
// endpoint cleanup + Ollama unload run normally. Fall back to
// a raw tmux kill when the Active tab isn't in the DOM.
for (const t of _active) {
try {
const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
const _btn = _el?.querySelector('.cookbook-task-action-stop');
if (_btn) {
_btn.click();
} else if (_runningMod._tmuxGracefulKill) {
await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: _runningMod._tmuxGracefulKill(t) }),
});
}
} catch (_killErr) { /* best-effort */ }
}
// Give the OS a beat to release port 8000.
await new Promise(r => setTimeout(r, 2500));
}
} catch (_e) { /* best-effort */ }
const backendWarning = _serveBackendWarning(m, repo, serveState.backend, serveState);
if (backendWarning) {
_restoreLaunchBtn();
await window.styledConfirm(backendWarning.body, {
title: backendWarning.title,
confirmText: 'Edit settings',
cancelText: 'Close',
});
return;
}
// Pre-launch GPU probe — common failure pattern: vLLM/SGLang launched
// on a host where no GPU is visible (driver missing, $CUDA_VISIBLE_DEVICES
// unset, container without --gpus). Catch it BEFORE the user spends
// minutes watching the task fail.
const _needsGpu = ['vllm', 'sglang'].includes(serveState.backend)
|| (serveState.backend === 'diffusers');
if (_needsGpu) {
try {
const _probeHost = (_envState.remoteHost || '').trim();
const _probeParams = new URLSearchParams();
if (_probeHost) {
_probeParams.set('host', _probeHost);
const _sp = (_serverByVal?.(_envState.remoteServerKey || _probeHost) || {}).port;
if (_sp) _probeParams.set('ssh_port', _sp);
}
const _probeRes = await fetch('/api/cookbook/gpus' + (_probeParams.toString() ? '?' + _probeParams : ''), { credentials: 'same-origin' });
const _probeData = await _probeRes.json();
const _probeGpus = Array.isArray(_probeData) ? _probeData : (_probeData.gpus || []);
if (!_probeGpus.length) {
const _proceed = await window.styledConfirm(
`No GPU detected on ${_probeHost ? _probeHost : 'this host'}. ${serveState.backend.toUpperCase()} needs a visible CUDA/ROCm accelerator to start — launching now will most likely crash early.\n\nLaunch anyway?`,
{ title: 'No GPU detected', confirmText: 'Launch anyway', cancelText: 'Cancel', danger: true },
);
if (!_proceed) { _restoreLaunchBtn(); return; }
}
} catch {
// Network / probe failure — don't block. Better to let the launch
// proceed than to silently refuse because the probe endpoint
// hiccuped (the user can read the real error in the task output).
}
}
// Pre-launch PORT probe — second most common failure pattern is
// collision with an already-running server (vllm crashing with
// "Address already in use" because Ollama owns 11434, or a
// previous vllm on the same port wasn't killed). The post-mortem
// "Suggested action: Kill existing vLLM" came AFTER the failed
// launch — user wants to know BEFORE clicking Launch. Parse the
// port out of the cmd, ssh-check who owns it on the target host,
// and offer to abort or proceed.
try {
const _portMatch = launchCmd.match(/(?:^|\s)(?:--port|-p|--host\s+\S+\s+--port)\s+(\d{2,5})\b/)
|| launchCmd.match(/(?:^|\s)--port=(\d{2,5})\b/)
|| launchCmd.match(/OLLAMA_HOST=[^:\s]+:(\d{2,5})\b/);
const _port = _portMatch ? _portMatch[1] : '';
if (_port) {
const _portHost = (_envState.remoteHost || '').trim();
const _checkInner = `ss -tlnp 2>/dev/null | awk '$4 ~ /:${_port}$/ {print; exit}' || netstat -tlnp 2>/dev/null | awk '$4 ~ /:${_port}$/ {print; exit}'`;
const _cmd = _portHost
? `ss h ${_portHost} <<<"" 2>/dev/null; ssh -o ConnectTimeout=4 -o StrictHostKeyChecking=no ${_portHost} ${JSON.stringify(_checkInner)}`
: _checkInner;
const _res = await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: _cmd }),
});
const _data = await _res.json().catch(() => ({}));
const _stdout = (_data.stdout || '').trim();
if (_stdout) {
// Try to surface the process name from `users:(("name",pid=...,...))`.
const _procMatch = _stdout.match(/users:\(\("([^"]+)",pid=(\d+)/);
const _procDesc = _procMatch
? `${_procMatch[1]} (PID ${_procMatch[2]})`
: 'another process';
const _hostLabel = _portHost ? _portHost : 'this host';
const _proceed = await window.styledConfirm(
`Port ${_port} on ${_hostLabel} is already in use by ${_procDesc}. Launching ${serveState.backend.toUpperCase()} now will fail with "Address already in use".\n\nStop the existing process first, OR change the --port in the command above, OR launch anyway and watch it crash.`,
{
title: `Port ${_port} taken`,
confirmText: 'Launch anyway',
cancelText: 'Cancel',
danger: true,
},
);
if (!_proceed) { _restoreLaunchBtn(); return; }
}
}
} catch {
// Probe failure — don't block. If the port check can't run we'd
// rather let the launch try than silently refuse.
}
// Save in the { _byRepo, _lastUsed } schema — no legacy flat keys at
// the root so per-model state doesn't leak between models.
// Stamp `_forceBackend: true` so the next open of this model defaults
// to the launched configuration end-to-end, even when the detector
// would have picked a different backend. Without this flag, the
// `savedMatchesBackend` gate inside sv() throws away every saved
// value when the detected backend doesn't match — the user opens
// Serve again and the panel looks like a fresh form despite a
// known-good prior launch.
try {
let cur = {};
try { cur = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {}
const byRepo = (cur && cur._byRepo && typeof cur._byRepo === 'object') ? cur._byRepo : {};
const _saved = { ...serveState, _forceBackend: true };
byRepo[repo] = _saved;
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _saved }));
} catch {}
const origEnv = _envState.env;
const origEnvPath = _envState.envPath;
const venvVal = panel.querySelector('[data-field="venv"]')?.value?.trim();
const gpusVal = panel.querySelector('[data-field="gpus"]')?.value?.trim();
const origGpus = _envState.gpus;
// Resolve the target host from the visible Server dropdown — the reliable
// source. Relying on _envState.remoteHost silently sent serves to Local
// when that value was stale/empty. Pass it explicitly to the launcher.
let serveHost = _envState.remoteHost || '';
let _srvEnv = '', _srvEnvPath = '';
const _ssEl = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server');
if (_ssEl && _ssEl.value != null) {
if (_ssEl.value === 'local') serveHost = '';
else {
const _srv = _serverByVal?.(_ssEl.value) || _envState.servers[parseInt(_ssEl.value)];
if (_srv) {
serveHost = _srv.host;
_srvEnv = _srv.env || '';
_srvEnvPath = _srv.envPath || '';
}
}
}
// The venv field wins; otherwise fall back to the env configured for the
// selected server in Settings, so the activation isn't silently dropped
// when the field is left blank (the per-server venv wasn't being applied).
if (venvVal) { _envState.env = 'venv'; _envState.envPath = venvVal; }
else if (_srvEnvPath) { _envState.env = (_srvEnv === 'conda' ? 'conda' : 'venv'); _envState.envPath = _srvEnvPath; }
if (gpusVal) _envState.gpus = gpusVal;
try {
await _withSpinner(_launchBtn, async () => {
// Pass the exact form values so the running task can be re-opened
// in the Serve panel pre-filled with these settings (Edit button).
await _launchServeTask(shortName, repo, launchCmd, serveState, serveHost);
});
} finally {
_envState.env = origEnv;
_envState.envPath = origEnvPath;
_envState.gpus = origGpus;
}
});
// Copy button — now icon-only, so flash a green checkmark on success
// instead of swapping to text (which would also break the width).
panel.querySelector('.hwfit-serve-copy').addEventListener('click', (e) => {
// Without stopPropagation the click bubbles up to the
// .doclib-card click handler that toggles the expand state →
// copying collapses the whole serve panel mid-flight.
e.preventDefault();
e.stopPropagation();
const cmd = panel.querySelector('.hwfit-serve-cmd').value;
_copyText(cmd).then(() => {
const btn = panel.querySelector('.hwfit-serve-copy');
const origHtml = btn.innerHTML;
btn.innerHTML = '
';
btn.classList.add('copied');
setTimeout(() => { btn.innerHTML = origHtml; btn.classList.remove('copied'); }, 1500);
});
});
});
});
}
// ── Delete / retry cached model ──
// Resolve the host the cached list was scanned from, mirroring
// _fetchCachedModels — so a delete targets the SAME machine the model
// actually lives on, not just the globally-selected serve host.
function _resolveCacheHost() {
let host = _envState.remoteHost || '';
const cacheSrv = document.getElementById('hwfit-cache-server');
function _serverByCacheValue(val) {
if (val === 'local') return null;
const found = _serverByVal?.(val)
|| (/^\d+$/.test(String(val)) ? _envState.servers[parseInt(val)] : null)
|| _envState.servers.find(x => x.name === val)
|| null;
return found || null;
}
if (cacheSrv) {
const val = cacheSrv.value;
if (val === 'local') {
host = '';
} else {
const s = _serverByCacheValue(val);
if (s) host = s.host;
}
}
return host;
}
async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = null) {
if (!skipConfirm && !(await uiModule.styledConfirm(`Delete ${repo} from cache?`, { confirmText: 'Delete', danger: true }))) return;
const m = model || _cachedAllModels.find(x => x.repo_id === repo);
// Delete the EXACT on-disk path the scan reported. Models in a custom
// model dir live at
/; HF-cache models at
// /models----. The old code always rm'd the hardcoded
// ~/.cache/huggingface/hub path, so models in a custom dir were never
// removed and reappeared on the next scan. m.path is already absolute
// (os.path.expanduser ran on the host); only the bare fallback uses ~.
let target;
if (m && m.is_local_dir && m.path) {
target = `${m.path}/${repo}`;
} else if (m && m.path) {
target = `${m.path}/models--${repo.replace(/\//g, '--')}`;
} else {
target = `~/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}`;
}
const host = _resolveCacheHost();
let cmd;
if (_isWindows()) {
const winTarget = target.startsWith('~')
? target.replace(/^~/, '$env:USERPROFILE').replace(/\//g, '\\')
: target.replace(/\//g, '\\');
cmd = `Remove-Item -Recurse -Force "${winTarget}" -ErrorAction SilentlyContinue`;
if (host) {
const pf = _sshPrefix(_getPort(host));
cmd = `ssh ${pf}${host} "powershell -Command \\"${cmd}\\""`;
}
} else {
// $HOME expands inside double quotes; ~ would not, so normalize the
// fallback. Quoting also handles spaces in custom model-dir paths.
const unixTarget = target.startsWith('~') ? target.replace(/^~/, '$HOME') : target;
cmd = `rm -rf "${unixTarget}"`;
if (host) cmd = _sshCmd(host, cmd, _getPort(host));
}
// Deleting a large model (tens/hundreds of GB) can take a while, especially
// over SSH — show a whirlpool spinner on the row so it doesn't look frozen.
let _wp = null, _prevPos = '';
if (itemEl) {
_wp = spinnerModule.createWhirlpool(18);
const ov = document.createElement('div');
ov.className = 'cookbook-delete-overlay';
// Just the whirlpool, centered — no "Deleting…" text.
ov.style.cssText = 'position:absolute;inset:0;display:flex;align-items:center;justify-content:center;background:color-mix(in srgb, var(--panel, var(--bg)) 82%, transparent);z-index:5;border-radius:inherit;';
ov.appendChild(_wp.element);
_prevPos = itemEl.style.position;
if (getComputedStyle(itemEl).position === 'static') itemEl.style.position = 'relative';
itemEl.style.pointerEvents = 'none';
itemEl.appendChild(ov);
}
try {
const res = await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: cmd }),
});
if (!res.ok) { uiModule.showError(`Delete failed (${res.status})`); return; }
if (itemEl) {
itemEl.querySelector('.cookbook-delete-overlay')?.remove();
itemEl.style.transition = 'opacity 0.24s ease, transform 0.24s ease, max-height 0.28s ease, padding 0.28s ease, margin 0.28s ease';
itemEl.style.maxHeight = `${Math.max(itemEl.getBoundingClientRect().height, itemEl.scrollHeight)}px`;
itemEl.style.overflow = 'hidden';
itemEl.style.opacity = '0';
itemEl.style.transform = 'translateX(-10px) scale(0.985)';
itemEl.style.paddingTop = '0';
itemEl.style.paddingBottom = '0';
itemEl.style.marginTop = '0';
itemEl.style.marginBottom = '0';
requestAnimationFrame(() => { itemEl.style.maxHeight = '0'; });
await new Promise(resolve => setTimeout(resolve, 300));
if (itemEl.parentElement) itemEl.remove();
}
// Drop from the in-memory list so a re-render/filter doesn't resurrect it.
_cachedAllModels = _cachedAllModels.filter(x => x.repo_id !== repo);
} catch (e) {
uiModule.showError('Delete failed: ' + (e && e.message ? e.message : e));
} finally {
// Tear down the spinner. On success the row is already gone; on error the
// row survives, so restore it (remove overlay, re-enable interaction).
if (_wp) { try { _wp.destroy(); } catch {} }
if (itemEl && itemEl.isConnected) {
itemEl.querySelector('.cookbook-delete-overlay')?.remove();
itemEl.style.pointerEvents = '';
itemEl.style.position = _prevPos;
}
}
}
function _retryCachedModel(repo, m) {
const payload = { repo_id: repo };
if (_envState.hfToken) payload.hf_token = _envState.hfToken;
const _target = _selectedServeTarget(document.getElementById('cookbook-modal') || document);
if (_target.host) {
payload.remote_host = _target.host;
if (_target.port) payload.ssh_port = _target.port;
}
if (_target.platform) payload.platform = _target.platform;
if (_isWindows()) {
if (_envState.env === 'venv' && _envState.envPath) {
payload.env_prefix = '& ' + _psQuote(_envState.envPath.endsWith('\\Scripts\\Activate.ps1') ? _envState.envPath : _envState.envPath + '\\Scripts\\Activate.ps1');
} else if (_envState.env === 'conda' && _envState.envPath) {
payload.env_prefix = 'conda activate ' + _psQuote(_envState.envPath);
}
} else {
if (_envState.env === 'venv' && _envState.envPath) {
const p = _envState.envPath;
payload.env_prefix = 'source ' + _shellQuote(p.endsWith('/bin/activate') ? p : p + '/bin/activate');
} else if (_envState.env === 'conda' && _envState.envPath) {
payload.env_prefix = 'eval "$(conda shell.bash hook)" && conda activate ' + _shellQuote(_envState.envPath);
}
}
_retryDownload((m?.name || repo).split('/').pop(), payload);
}
// ── Open the Serve panel for a specific repo, pre-filled ──
//
// Used by the running-task "Edit / relaunch" button. Writes the supplied
// field values into the per-repo serve state so the panel's existing
// restore logic fills the form exactly, switches to the Serve tab, then
// finds the model's cached card and expands it.
export async function openServePanelForRepo(repo, fields) {
if (!repo) return false;
// Seed the per-repo serve state with the exact launch fields so the
// panel restores them when it builds.
if (fields && typeof fields === 'object') {
try {
let cur = {};
try { cur = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {}
const byRepo = (cur && cur._byRepo && typeof cur._byRepo === 'object') ? cur._byRepo : {};
// Mirror the launch-time save: stamp _forceBackend so the panel's
// sv() helper treats these seeded fields as authoritative, not as
// overridable defaults.
const _seeded = { ...fields, _forceBackend: true };
byRepo[repo] = _seeded;
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _seeded }));
} catch {}
}
// Switch to the Serve tab (its click handler triggers _fetchCachedModels).
const serveTab = document.querySelector('.cookbook-tab[data-backend="Serve"]');
if (serveTab && !serveTab.classList.contains('active')) {
serveTab.click();
} else {
// Already on the Serve tab — refresh the list so the card is present.
try { await _fetchCachedModels(); } catch {}
}
// Poll for the model's card to render, then expand it. Cached-model
// fetch is async and we don't get a direct completion hook from the
// tab click, so retry for a few seconds.
// A model downloaded to a CUSTOM dir is scanned by its folder name (the short
// name), while the download task carries the full HF repo id — so match by the
// exact repo OR by the short (last-segment) name, else the card is never found.
const _short = repo.split('/').pop();
const _esc = (v) => (window.CSS && CSS.escape) ? CSS.escape(v) : v;
for (let i = 0; i < 50; i++) {
let card = document.querySelector(`.memory-item[data-repo="${_esc(repo)}"]`);
if (!card && _short && _short !== repo) {
card = document.querySelector(`.memory-item[data-repo="${_esc(_short)}"]`)
|| [...document.querySelectorAll('.memory-item[data-repo]')]
.find(el => (el.dataset.repo || '').split('/').pop() === _short);
}
if (card) {
// If we were given fields to restore, force a fresh render of the
// serve panel so it reads the just-written _byRepo[repo] values
// from localStorage. Without this, an already-expanded card kept
// its stale form and the "Edit serve" → previous settings round-
// trip looked broken from the user's side.
if (fields && card.classList.contains('doclib-card-expanded')) {
card.click();
await new Promise(r => setTimeout(r, 40));
card.click();
} else if (!card.classList.contains('doclib-card-expanded')) {
card.click();
}
try { card.scrollIntoView({ behavior: 'smooth', block: 'center' }); } catch {}
return true;
}
await new Promise(r => setTimeout(r, 100));
}
uiModule.showToast('Model not found in cache — switch to the Serve tab manually');
return false;
}
// ── Fetch cached models from server ──
export async function _fetchCachedModels() {
const list = document.getElementById('hwfit-cached-list');
if (!list) return;
list.innerHTML = '';
const _dlWp = spinnerModule.createWhirlpool(18);
const _dlWrap = document.createElement('div');
_dlWrap.className = 'hwfit-loading';
_dlWrap.style.cssText = 'flex-direction:column;gap:6px;';
_dlWrap.appendChild(_dlWp.element);
const _dlLabel = document.createElement('div');
_dlLabel.textContent = 'Scanning cached models…';
_dlLabel.style.cssText = 'opacity:0.5;font-size:11px;';
_dlWrap.appendChild(_dlLabel);
list.appendChild(_dlWrap);
try {
let host = _envState.remoteHost || '';
let selectedServer = null;
const _serverByCacheValue = (val) => {
if (val === 'local') return null;
return _serverByVal?.(val)
|| (/^\d+$/.test(String(val)) ? _envState.servers[parseInt(val)] : null)
|| _envState.servers.find(x => x.name === val)
|| null;
};
const cacheSrv = document.getElementById('hwfit-cache-server');
if (cacheSrv) {
const val = cacheSrv.value;
if (val === 'local') {
host = '';
selectedServer = _envState.servers.find(s => !s.host || s.host === 'local') || _envState.servers[0];
} else {
const s = _serverByCacheValue(val);
if (s) { host = s.host; selectedServer = s; }
}
} else {
selectedServer = _envState.servers.find(s => s.host === host) || _envState.servers[0];
}
// Read extra model dirs from the SELECTED server's modelDirs (canonical source)
const modelDirs = [];
if (selectedServer && Array.isArray(selectedServer.modelDirs)) {
for (const d of selectedServer.modelDirs) {
if (d && d !== '~/.cache/huggingface/hub') modelDirs.push(d);
}
}
// Sync the header dir pills to THIS server (the one whose models we're listing).
// They were rendered once from _es.remoteHost, which can differ from the
// cache-server dropdown — so the title showed only ~/.cache even while listing
// models from a custom model directory. Keep them in lock-step with the actual scan host.
const _dirsEl = document.querySelector('.cookbook-serve-dirs');
if (_dirsEl && selectedServer) {
const _allDirs = (Array.isArray(selectedServer.modelDirs) && selectedServer.modelDirs.length
? selectedServer.modelDirs
: [selectedServer.modelDir || '~/.cache/huggingface/hub'])
.map(d => (d || '').replaceAll('✕', '').replaceAll('✖', '').trim()).filter(Boolean);
_dirsEl.innerHTML = _allDirs.map(d => `${esc(d)}`).join('')
+ 'edit';
_dirsEl.querySelector('.cookbook-serve-dir-edit')?.addEventListener('click', () => {
document.querySelector('#cookbook-modal .cookbook-tab[data-backend="Settings"]')?.click();
});
}
const qp = new URLSearchParams();
if (host) { qp.set('host', host); const _sp4 = _getPort(host); if (_sp4) qp.set('ssh_port', _sp4); const _plat = _getPlatform(host); if (_plat) qp.set('platform', _plat); }
if (modelDirs.length) qp.set('model_dir', modelDirs.join(','));
const params = qp.toString() ? `?${qp}` : '';
const res = await fetch(`/api/model/cached${params}`);
if (!res.ok) {
const body = await res.text().catch(() => '');
let msg = '';
try {
const payload = JSON.parse(body);
msg = payload && (payload.detail || payload.error || payload.message);
} catch {
msg = body;
}
msg = typeof msg === 'string' ? msg.trim() : '';
throw new Error(`HTTP ${res.status} ${res.statusText}${msg ? `: ${msg}` : ''}`);
}
const data = await res.json();
_dlWp.destroy();
// CHANGELOG: 'ready' already excludes partial downloads;
// show every complete model regardless of size/backend.
const ready = data.models.filter(m => m.status === 'ready');
const downloading = data.models.filter(m => m.status === 'downloading');
const allModels = [...ready, ...downloading];
_cachedAllModels = allModels;
if (!allModels.length) {
if (!host) {
list.innerHTML = 'No cached models found
Docker Local uses Odysseus’s cache in data/huggingface. Download a model here, or copy an existing host HuggingFace cache into that folder once.
';
} else {
list.innerHTML = 'No cached models found
';
}
document.getElementById('serve-tags').innerHTML = '';
return;
}
// Auto-detect type + family tags
const _tagMap = {};
const _familyMap = {};
const _families = [
[/qwen/i, 'qwen'], [/llama/i, 'llama'], [/mistral|mixtral/i, 'mistral'],
[/deepseek/i, 'deepseek'], [/gemma/i, 'gemma'], [/phi/i, 'phi'],
[/minimax/i, 'minimax'], [/glm/i, 'glm'], [/flux/i, 'flux'],
[/stable.?diffusion|sdxl/i, 'sd'], [/z-image/i, 'z-image'],
[/whisper/i, 'whisper'], [/command|cohere/i, 'cohere'],
[/yi-/i, 'yi'], [/intern/i, 'intern'], [/falcon/i, 'falcon'],
];
for (const m of allModels) {
const n = (m.repo_id || '').toLowerCase();
let tag = 'other';
if (m.backend === 'ollama' || m.is_ollama) tag = 'llm';
else if (m.is_diffusion || /flux|sdxl|stable-diffusion|z-image|qwen-image|diffusion|dreamshar/i.test(n)) tag = 'image';
else if (/whisper|stt|asr/i.test(n)) tag = 'stt';
else if (/tts|cosyvoice|parler/i.test(n)) tag = 'tts';
else if (/embed|bge|minilm|e5-/i.test(n)) tag = 'embedding';
else if (/lora|adapter/i.test(n)) tag = 'lora';
else tag = 'llm';
m._tag = tag;
_tagMap[tag] = (_tagMap[tag] || 0) + 1;
m._family = '';
for (const [re, fam] of _families) {
if (re.test(n)) { m._family = fam; _familyMap[fam] = (_familyMap[fam] || 0) + 1; break; }
}
if ((m.backend === 'ollama' || m.is_ollama) && !m._family) {
m._family = 'ollama';
_familyMap.ollama = (_familyMap.ollama || 0) + 1;
}
}
// Render tag chips
const tagContainer = document.getElementById('serve-tags');
if (tagContainer) {
const tagOrder = ['llm', 'image', 'lora', 'embedding', 'tts', 'stt', 'other'];
let tagHtml = ``;
for (const t of tagOrder) {
if (!_tagMap[t]) continue;
tagHtml += ``;
}
const sortedFamilies = Object.entries(_familyMap).sort((a, b) => b[1] - a[1]);
if (sortedFamilies.length) {
for (const [fam, count] of sortedFamilies) {
const logo = providerLogo(fam);
const logoHtml = logo ? `${logo}` : '';
tagHtml += ``;
}
}
tagContainer.innerHTML = tagHtml;
}
_rerenderCachedModels();
} catch (e) {
_dlWp.destroy();
list.innerHTML = `Failed: ${esc(e.message)}
`;
}
}
/** Filter presets matching a model repo */
function _presetsForModel(presets, repo) {
const short = repo.split('/').pop();
return presets.filter(p => {
const pm = p.model || ''; const pn = p.name || '';
return pm === repo || pn === repo || pm.split('/').pop() === short || pn === short;
});
}
// ── Init ──
export function initServe(shared) {
_envState = shared._envState;
_sshCmd = shared._sshCmd;
_getPort = shared._getPort;
_sshPrefix = shared._sshPrefix;
_serverByVal = shared._serverByVal;
_getPlatform = shared._getPlatform;
_isWindows = shared._isWindows;
_isMetal = shared._isMetal;
_buildEnvPrefix = shared._buildEnvPrefix;
_buildServeCmd = shared._buildServeCmd;
_shellQuote = shared._shellQuote;
_psQuote = shared._psQuote;
_detectBackend = shared._detectBackend;
_detectToolParser = shared._detectToolParser;
_detectModelOptimizations = shared._detectModelOptimizations;
_loadPresets = shared._loadPresets;
_savePresets = shared._savePresets;
_copyText = shared._copyText;
_persistEnvState = shared._persistEnvState;
_getGpuToggleTotal = shared._getGpuToggleTotal;
modelLogo = shared.modelLogo;
esc = shared.esc;
_launchServeTask = shared._launchServeTask;
_retryDownload = shared._retryDownload;
_nextAvailablePort = shared._nextAvailablePort;
}
export { _cachedAllModels, _filterCachedList, _rerenderCachedModels, _deleteCachedModel };
// Click the "running" pill on a serve-card → switch to Cookbook → Running
// tab and scroll the matching task into view, with a brief flash so the
// user can find it among a long list. Tracks the click via event
// delegation so it survives every _rerenderCachedModels() pass.
function _openRunningTabForRepo(repo) {
const body = document.querySelector('#cookbook-modal .cookbook-body');
if (!body) return;
const runTab = body.querySelector('.cookbook-tab[data-backend="Running"]');
if (runTab) runTab.click();
// The Running tab needs a tick to mount/render before we can find
// task cards inside it.
setTimeout(() => {
const candidates = Array.from(body.querySelectorAll('.cookbook-task'));
const match = candidates.find(c => {
// task cards expose modelId or name via dataset / inner title
const dsRepo = c.dataset?.modelId || c.dataset?.repoId || '';
if (dsRepo === repo) return true;
const title = c.querySelector('.cookbook-task-title, .memory-item-title')?.textContent?.trim() || '';
return title === repo || title === (repo.split('/').pop() || '');
});
if (match) {
try { match.scrollIntoView({ behavior: 'smooth', block: 'center' }); } catch (_) {}
match.classList.add('cookbook-task-flash');
setTimeout(() => match.classList.remove('cookbook-task-flash'), 1600);
}
}, 180);
}
document.addEventListener('click', (e) => {
const pill = e.target.closest && e.target.closest('.cookbook-serve-running-pill.is-clickable');
if (!pill) return;
e.preventDefault();
e.stopPropagation();
const repo = pill.dataset.repo || '';
if (repo) _openRunningTabForRepo(repo);
});