// ============================================
// COOKBOOK HWFIT SUB-MODULE
// "What Fits?" hardware model fitting UI
// ============================================
import {
_envState,
_persistEnvState,
esc,
modelLogo,
_detectBackend,
_runModelDownload,
_runPanelCmd,
_buildDownloadCmd,
_addTask,
_renderRunningTab,
_detectToolParser,
_lastCacheHost,
_setLastCacheHost,
_serverByVal,
_serverKey,
_currentServerValue,
_shellQuote,
_MODELDIR_CHECK_ON,
_MODELDIR_CHECK_OFF,
_serverEntryHtml,
_copyText,
// Import cookbook.js WITHOUT a ?v= query — the same plain specifier every other
// importer uses. A query mismatch loads cookbook.js twice as two separate modules
// (two _envState objects), which silently sent downloads to the wrong server.
} from './cookbook.js';
import uiModule from './ui.js';
import spinnerModule from './spinner.js';
// ── What Fits? (hardware model fitting) ──
export let _hwfitCache = null;
export let _hwfitDebounce = null;
export let _cachedModelIds = null; // repo IDs already downloaded
// Bumped on every _hwfitFetch; a slow scan (remote SSH probe can take ~10s)
// checks this before rendering so a stale response can't clobber a newer one
// after the user has switched servers.
let _hwfitFetchToken = 0;
let _dismissedHwChips = new Set();
// Permanently removed (X-clicked) chips. Separate from _dismissedHwChips
// so the ranker treats "off" and "removed" the same (both ignore the
// hardware) but the UI keeps "off" chips visible to toggle back on,
// while "removed" ones don't render at all until next rescan.
let _removedHwChips = new Set();
export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden
function _firstGgufSource(model) {
const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
return sources.find(src => src && src.repo) || null;
}
function _looksLikeGgufRepo(model) {
const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
}
function _downloadSourceRepo(model, backend) {
if (backend === 'llamacpp') {
const ggufSource = _firstGgufSource(model);
if (ggufSource) return { repo: ggufSource.repo, kind: 'GGUF' };
if (_looksLikeGgufRepo(model)) {
const repo = model?.quant_repo || model?.repo_id || model?.name;
if (repo) return { repo, kind: 'GGUF' };
}
}
return { repo: model?.quant_repo || model?.name || '', kind: '' };
}
// Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a
// (possibly different) server, WITHOUT clearing the markup now — clearing it made
// the buttons flicker out and back in. The old buttons stay visible until the
// fresh scan returns and swaps them in place. Lives here (not cookbook.js) because
// _gpuToggleTotal is a module-local binding that can't be reassigned by importers.
export function _resetGpuToggleState(clearDismissed = true) {
if (clearDismissed) {
_dismissedHwChips = new Set();
_removedHwChips = new Set();
}
const tc = document.getElementById('hwfit-gpu-toggles');
if (tc) {
tc._originalSystem = null;
tc._activeCount = undefined;
tc._activeGroup = undefined;
tc._groups = null;
tc._builtGroup = undefined;
delete tc.dataset.rendered;
}
_gpuToggleTotal = 0;
}
// Trim vendor noise so a pool label reads "RTX 4090 D" not "NVIDIA GeForce RTX 4090 D".
function _shortGpuName(name) {
return String(name || 'GPU')
.replace(/^NVIDIA\s+GeForce\s+/i, '')
.replace(/^NVIDIA\s+/i, '')
.replace(/^AMD\s+(Radeon\s+)?/i, '')
.trim() || 'GPU';
}
// Powers of two up to the pool size, plus the exact pool size — these are the
// only safe vLLM --tensor-parallel-size values (TP must divide the GPU count and
// the model's attention heads). Never offer a count we can't actually serve.
function _validTpCounts(poolSize) {
const out = [1, 2, 4, 8, 16].filter(n => n <= poolSize);
if (poolSize > 0 && !out.includes(poolSize)) out.push(poolSize);
return out;
}
export function _renderGpuToggles(system) {
const container = document.getElementById('hwfit-gpu-toggles');
if (!container) return;
const groups = Array.isArray(system.gpu_groups) ? system.gpu_groups : [];
// Box-wide GPU total, stable across fetches. The route shrinks system.gpu_count
// to the *active pool* once we pin one, so derive the total from the (immutable)
// group list or the raw detection, never from the possibly-overridden count.
const total = system.detected_gpu_count
|| (groups.length ? groups.reduce((s, g) => s + (g.count || 0), 0) : (system.gpu_count || 0));
if (total <= 0 && !system.has_gpu) {
container.innerHTML = '';
container._groups = null;
_gpuToggleTotal = 0;
return;
}
if (!_gpuToggleTotal) _gpuToggleTotal = total;
container._groups = groups;
if (container._activeGroup === undefined) container._activeGroup = 0; // auto = largest pool
const heterogeneous = groups.length > 1;
// Rebuild only when the hardware shape changes OR the chosen pool changes (the
// count buttons are pool-specific). Otherwise a re-scan would flicker them.
const sig = `${total}|${groups.map(g => g.count + ':' + g.vram_each).join(',')}`;
if (container.dataset.rendered === sig && container._builtGroup === container._activeGroup) return;
container.dataset.rendered = sig;
container._builtGroup = container._activeGroup;
const grp = groups[container._activeGroup] || groups[0]
|| { count: total, vram_each: 0, name: system.gpu_name || 'GPU' };
const poolSize = grp.count || total;
let html = '';
if (heterogeneous) {
html += `';
}
const validCounts = _validTpCounts(poolSize);
const maxGpu = validCounts.length ? validCounts[validCounts.length - 1] : 0;
// Commit the data layer to maxGpu on initial render so it matches the
// visual highlight. Before this, _activeCount stayed undefined → no
// gpu_count param sent → backend's fallback could rank against RAM on
// mixed-resource boxes ("tightest" sorted by RAM instead of GPU).
if (container._activeCount === undefined && validCounts.length) {
container._activeCount = maxGpu;
}
html += '';
const hasExplicitCount = typeof container._activeCount === 'number';
for (const n of validCounts) {
const text = n === 1 ? 'GPU' : n + ' GPU';
const isActive = hasExplicitCount && n === container._activeCount;
html += ``;
}
// Also mark the RAM button active when the user explicitly chose RAM (0)
// — the loop above only handles GPU buttons.
if (container._activeCount === 0) {
const ramBtn = container.querySelector('.hwfit-gpu-btn[data-count="0"]');
// (we just set innerHTML so we re-mark below after assignment)
}
container.innerHTML = html;
if (container._activeCount === 0) {
const ramBtn = container.querySelector('.hwfit-gpu-btn[data-count="0"]');
if (ramBtn) ramBtn.classList.add('active');
}
// Pool dropdown: switch pools, reset the count to the new pool's max, rebuild.
const sel = container.querySelector('#hwfit-gpu-group');
if (sel) {
sel.addEventListener('change', () => {
container._activeGroup = parseInt(sel.value) || 0;
container._activeCount = undefined; // default to the new pool's max
delete container.dataset.rendered; // force a count-button rebuild
_renderGpuToggles(system);
_hwfitCache = null;
_hwfitFetch();
});
}
if (!container._gpuBound) {
container._gpuBound = true;
container.addEventListener('click', (e) => {
const btn = e.target.closest('.hwfit-gpu-btn');
if (!btn) return;
const count = parseInt(btn.dataset.count);
const wasActive = btn.classList.contains('active') && container._activeCount === count;
container.querySelectorAll('.hwfit-gpu-btn').forEach(b => b.classList.remove('active'));
if (wasActive) {
container._activeCount = null;
} else {
btn.classList.add('active');
container._activeCount = count;
// Auto-suggest a quant based on hardware selection — but ONLY when the
// user has already picked a specific quant. When they're on "All"
// (value === ""), leave them on All: toggling a GPU shouldn't silently
// yank them out of the All view they wanted to see.
const quantSel = document.getElementById('hwfit-quant');
if (quantSel && quantSel.value !== '') {
if (count <= 1) {
quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot
} else if (String(system?.backend || '').toLowerCase() === 'rocm') {
quantSel.value = 'Q4_K_M'; // ROCm default stays GGUF/local-safe; AWQ is explicit only
} else {
quantSel.value = 'AWQ-4bit'; // Multi-GPU -> AWQ for vLLM
}
}
}
_hwfitCache = null;
_hwfitFetch();
});
}
}
// --- Scan persistence (survives page reloads) ----------------------------
// The backend caches hardware detection per host (~30 min) but that's lost on a
// service restart, and a reload still shows a spinner while it re-fetches. Cache
// the last successful /models result per param-signature in localStorage so a
// reload paints instantly, then we refresh in the background and swap.
const _SCAN_CACHE_KEY = 'hwfit_scan_cache_v1';
const _MANUAL_HW_KEY = 'hwfit_manual_hardware_v1';
const _CTX_KEY = 'hwfit_target_context_v1';
const _CTX_PRESETS = [8192, 16384, 32768, 50000, 131072, 0]; // 0 = model max
const _SCAN_CACHE_MAX = 12; // keep the newest N signatures
const _SCAN_CACHE_TTL = 6 * 3600 * 1000; // 6 h — hardware rarely changes
// Ctx slider helpers (ported from origin/main). The slider picks an INDEX into
// _CTX_PRESETS; _ctxValue() resolves it to a token count (0 = "Max"). The label
// next to the slider re-renders to "8k" / "16k" / … / "Max".
function _ctxLabel(value) {
const n = Number(value) || 0;
if (!n) return 'Max';
return n >= 1000 ? Math.round(n / 1000) + 'k' : String(n);
}
function _ctxValue() {
const slider = document.getElementById('hwfit-context');
const idx = Math.max(0, Math.min(_CTX_PRESETS.length - 1, Number(slider?.value ?? 3) || 0));
return _CTX_PRESETS[idx] || 0;
}
function _syncCtxControl() {
const slider = document.getElementById('hwfit-context');
const label = document.getElementById('hwfit-context-label');
if (!slider) return;
const saved = localStorage.getItem(_CTX_KEY);
const savedIdx = saved == null ? 3 : _CTX_PRESETS.indexOf(Number(saved));
slider.value = String(savedIdx >= 0 ? savedIdx : 3);
if (label) label.textContent = _ctxLabel(_ctxValue());
}
function _manualHwState() {
try {
const s = JSON.parse(localStorage.getItem(_MANUAL_HW_KEY) || '{}');
if (s && (s.mode === 'gpu' || s.mode === 'ram')) return s;
} catch {}
return null;
}
function _saveManualHwState(s) {
try {
if (!s || !s.mode) localStorage.removeItem(_MANUAL_HW_KEY);
else localStorage.setItem(_MANUAL_HW_KEY, JSON.stringify(s));
} catch {}
}
function _manualHwParams() {
const s = _manualHwState();
if (!s) return {};
return {
manual_mode: s.mode,
manual_gpu_count: s.mode === 'gpu' ? String(s.gpuCount || 1) : '',
manual_vram_gb: s.mode === 'gpu' ? String(s.vramGb || 8) : '',
manual_ram_gb: s.ramGb ? String(s.ramGb) : '',
manual_backend: s.mode === 'gpu' ? (s.backend || 'cuda') : '',
};
}
function _manualNumber(value, fallback) {
const raw = String(value || '').replace(',', '.');
const match = raw.match(/-?\d+(?:\.\d+)?/);
if (!match) return fallback;
const n = Number(match[0]);
return Number.isFinite(n) && n > 0 ? n : fallback;
}
function _manualOptionalNumber(value) {
const raw = String(value || '').replace(',', '.');
const match = raw.match(/-?\d+(?:\.\d+)?/);
if (!match) return null;
const n = Number(match[0]);
return Number.isFinite(n) && n > 0 ? n : null;
}
function _manualHwLabel(s) {
if (!s) return '';
// Manual mode is a "what if" SIMULATOR — values REPLACE detected
// hardware (matches server-side _apply_manual_hardware). Label
// phrased as plain "X GB" instead of additive "+X GB" so the user
// sees the simulated TOTAL, not an addition.
const ram = s.ramGb ? ` · ${s.ramGb} GB RAM` : '';
if (s.mode === 'ram') return `Manual: ${s.ramGb || 0} GB RAM only`;
const gpus = `${s.gpuCount || 1} GPU${Number(s.gpuCount || 1) === 1 ? '' : 's'}`;
return `Manual: ${gpus} · ${s.vramGb || 8} GB VRAM each${ram}`;
}
function _manualDisplaySystem(sys, manual) {
const base = { ...(sys || {}) };
if (!manual) return base;
base.manual_hardware = true;
// REPLACE detected RAM with the manual total. Previously this added
// on top of detected, which (a) contradicted the new server-side
// "replace" behavior and (b) made the chip's displayed total not
// match what was actually being ranked against.
if (manual.ramGb) {
base.available_ram_gb = Number(manual.ramGb);
base.total_ram_gb = Number(manual.ramGb);
}
if (manual.mode === 'ram') {
// RAM-only simulation — wipe GPU side so the chip display matches
// what the server is ranking against (CPU/RAM paths only).
base.has_gpu = false;
base.gpu_name = null;
base.gpu_vram_gb = 0;
base.gpu_count = 0;
return base;
}
if (manual.mode !== 'ram') {
const count = Number(manual.gpuCount || 1);
const vram = Number(manual.vramGb || 8);
const backend = (manual.backend || 'cuda').toUpperCase();
base.gpu_name = `Simulated ${backend} GPU` + (count > 1 ? ` × ${count}` : '');
base.gpu_vram_gb = Math.round(vram * count * 10) / 10;
base.gpu_count = count;
base.backend = manual.backend || 'cuda';
}
return base;
}
// Signature of everything that affects the result list, so we never paint a
// cached list under mismatched filters.
function _scanSig() {
const sortEl = document.getElementById('hwfit-sort');
const tc = document.getElementById('hwfit-gpu-toggles');
return JSON.stringify({
h: _envState.remoteHost || '',
hk: _currentServerValue(),
u: document.getElementById('hwfit-usecase')?.value || '',
s: document.getElementById('hwfit-search')?.value?.trim() || '',
o: sortEl?.value || 'score',
r: sortEl?.dataset.reverse === '1' ? 1 : 0,
q: document.getElementById('hwfit-quant')?.value || '',
c: _ctxValue(),
g: (tc && typeof tc._activeCount === 'number') ? String(tc._activeCount) : '',
gg: (tc && tc._activeGroup) ? String(tc._activeGroup) : '',
m: _manualHwParams(),
d: Array.from(_dismissedHwChips).sort(),
});
}
function _readScanCache(sig) {
try {
const all = JSON.parse(localStorage.getItem(_SCAN_CACHE_KEY) || '{}');
const e = all[sig];
if (e && (Date.now() - e.ts) < _SCAN_CACHE_TTL) return e.data;
} catch {}
return null;
}
function _writeScanCache(sig, data) {
try {
const all = JSON.parse(localStorage.getItem(_SCAN_CACHE_KEY) || '{}');
all[sig] = { ts: Date.now(), data: { system: data.system, models: data.models } };
const keys = Object.keys(all);
if (keys.length > _SCAN_CACHE_MAX) {
keys.sort((a, b) => (all[a].ts || 0) - (all[b].ts || 0));
for (const k of keys.slice(0, keys.length - _SCAN_CACHE_MAX)) delete all[k];
}
localStorage.setItem(_SCAN_CACHE_KEY, JSON.stringify(all));
} catch {}
}
// Render a clear scan-failure card into the model list: which server failed, the
// underlying reason (small), and a Retry button that forces a fresh probe. Used
// for both the backend-reported error (SSH/probe failure) and network failures,
// instead of dumping a raw one-line message.
function _hwfitShowError(list, host, detail) {
if (!list) return;
const where = host ? esc(host) : 'this machine';
const div = document.createElement('div');
div.className = 'hwfit-loading';
div.style.cssText = 'flex-direction:column;gap:8px;text-align:center;';
div.innerHTML =
`
Couldn't scan ${where}
`
+ (detail ? `
${esc(detail)}
` : '')
+ ``;
list.innerHTML = '';
list.appendChild(div);
const rb = div.querySelector('#hwfit-retry');
if (rb) rb.addEventListener('click', () => { _resetGpuToggleState(); _hwfitFetch(true); });
}
// Client-side "Engine" filter (llama.cpp / vLLM / SGLang / Ollama). Empty =
// show all. Uses the same _detectBackend() the serve commands use, so what you
// filter to is exactly what would be launched. Pure view filter — no refetch
// needed. Ollama rows are merged into the main list (see _ensureOllamaLib +
// _ollamaToHwfitRows below) so the filter handles all engines uniformly.
function _applyEngineFilter(models) {
const want = document.getElementById('hwfit-engine')?.value || '';
if (!want || !Array.isArray(models)) return models || [];
return models.filter(m => {
try { return _detectBackend(m).backend === want; } catch { return true; }
});
}
// Ollama library cache (per-page). Filled lazily on first _hwfitFetch; the raw
// list is the same shape returned by /api/cookbook/ollama/library, then turned
// into per-tag hwfit rows so they slot into the main list grid alongside HF
// scan results.
let _ollamaLibCache = null;
async function _ensureOllamaLib() {
if (_ollamaLibCache) return _ollamaLibCache;
try {
const res = await fetch('/api/cookbook/ollama/library');
const data = await res.json();
_ollamaLibCache = Array.isArray(data?.models) ? data.models : [];
} catch { _ollamaLibCache = []; }
return _ollamaLibCache;
}
// Convert an Ollama library entry's sizes into per-tag hwfit rows. Shape
// matches what _hwfitRenderList expects (fit_level, parameter_count,
// required_gb, score, …) so the rows render identically to HF results.
function _olParseSize(s) {
// "14b" → 14, "1.5b" → 1.5, "8x7b" → 56 (rough), "135m" → 0.135, "latest" → null
if (!s) return null;
const low = s.toLowerCase();
let m = low.match(/^(\d+(?:\.\d+)?)x(\d+(?:\.\d+)?)b$/);
if (m) return parseFloat(m[1]) * parseFloat(m[2]);
m = low.match(/^(\d+(?:\.\d+)?)b$/);
if (m) return parseFloat(m[1]);
m = low.match(/^(\d+(?:\.\d+)?)m$/);
if (m) return parseFloat(m[1]) / 1000;
return null;
}
function _ollamaToHwfitRows(libModels, vramAvail, ramAvail) {
const out = [];
if (!Array.isArray(libModels)) return out;
for (const m of libModels) {
const sizes = (Array.isArray(m.sizes) && m.sizes.length) ? m.sizes : ['latest'];
for (const sz of sizes) {
const params = _olParseSize(sz);
// Ollama default GGUF is ~Q4_K_M. Rough VRAM estimate: 0.6 GB / B.
const vramGb = params ? params * 0.6 : 0;
let fitLevel = 'no_fit';
if (vramGb && vramAvail) {
if (vramGb <= vramAvail * 0.6) fitLevel = 'perfect';
else if (vramGb <= vramAvail) fitLevel = 'good';
else if (ramAvail && vramGb <= ramAvail) fitLevel = 'marginal';
else fitLevel = 'too_tight';
} else if (vramGb && ramAvail && vramGb <= ramAvail) {
fitLevel = 'marginal';
}
const tag = `${m.name}:${sz}`;
const paramsLabel = params
? (params >= 1 ? params.toFixed(params >= 10 ? 0 : 1) + 'B' : (params * 1000).toFixed(0) + 'M')
: '?';
// A modest score so Ollama rows still sort sensibly in the default
// score view — bigger models get a slightly higher base, but they
// always come in below well-scored HF results. Sort by Fit or VRAM
// to surface them more aggressively.
const score = params ? Math.min(30 + params * 0.3, 60) : 25;
out.push({
name: tag,
repo_id: tag,
quant: 'Q4_K_M',
parameter_count: paramsLabel,
params_b: params || 0,
required_gb: vramGb,
fit_level: fitLevel,
score,
speed_tps: 0,
context: 0,
is_gguf: true,
backend: 'ollama',
_isOllama: true,
_olName: m.name,
_olSize: sz,
_description: m.description || '',
});
}
}
return out;
}
export async function _hwfitFetch(fresh = false) {
const _tk = ++_hwfitFetchToken;
const useCase = document.getElementById('hwfit-usecase')?.value || '';
const search = document.getElementById('hwfit-search')?.value?.trim() || '';
const remoteHost = _envState.remoteHost || '';
const list = document.getElementById('hwfit-list');
const hw = document.getElementById('hwfit-hw');
if (!list) return;
const hasManualOrDismissed = !!_manualHwState() || _dismissedHwChips.size > 0;
if (hasManualOrDismissed) fresh = true;
// Instant paint from the persisted cache (skipped on a forced Rescan), so a
// reload shows the last result with no spinner. We still fetch fresh below and
// swap it in. If there's no cache hit, fall back to the spinner.
const _sig = _scanSig();
const _cached = fresh ? null : _readScanCache(_sig);
const wp = spinnerModule.createWhirlpool(18);
if (_cached) {
_hwfitCache = _cached;
_hwfitRenderHw(hw, _cached.system);
if (!remoteHost && _cached.system && _cached.system.platform) {
_envState.platform = _cached.system.platform;
}
_hwfitRenderList(list, _applyEngineFilter(_cached.models));
} else {
// Show spinner while scanning — stack the spinner above a text label
// (the .hwfit-loading class is a centered flex ROW, so force column here).
const loadingDiv = document.createElement('div');
loadingDiv.className = 'hwfit-loading';
loadingDiv.style.flexDirection = 'column';
loadingDiv.style.gap = '6px';
loadingDiv.appendChild(wp.element);
// Text label like the other cookbook tabs: "Loading…", then if the scan runs
// long (remote SSH hardware probe), switch to "Scanning hardware…".
const loadingLbl = document.createElement('div');
loadingLbl.textContent = 'Loading…';
loadingLbl.style.cssText = 'text-align:center;opacity:0.5;font-size:11px;';
loadingDiv.appendChild(loadingLbl);
setTimeout(() => { if (loadingLbl.isConnected) loadingLbl.textContent = 'Scanning hardware…'; }, 2000);
list.innerHTML = '';
list.appendChild(loadingDiv);
_hwfitCache = null; // no instant paint — clear until the fetch returns
}
// Only fetch cached model IDs when server changes, not on every search/sort
const remoteKey = _currentServerValue();
if (!_cachedModelIds || _lastCacheHost() !== remoteKey) {
_setLastCacheHost(remoteKey);
const _cacheSrv = _serverByVal(_envState.remoteServerKey || remoteHost);
const _cachePort = _cacheSrv?.port || '';
const _cacheParams = new URLSearchParams();
if (remoteHost) {
_cacheParams.set('host', remoteHost);
if (_cachePort) _cacheParams.set('ssh_port', _cachePort);
if (_cacheSrv?.platform) _cacheParams.set('platform', _cacheSrv.platform);
}
fetch(`/api/model/cached?${_cacheParams}`, { credentials: 'same-origin' })
.then(r => r.json())
.then(d => {
// Exclude stalled (download-shell) entries — a 12 KB README-only
// folder shouldn't count as "downloaded" in the Scan/Download list.
_cachedModelIds = new Set((d.models || []).filter(m => m.status !== 'stalled').map(m => m.repo_id));
// Re-mark rows if already rendered
list.querySelectorAll('.hwfit-row[data-model]').forEach(row => {
const name = row.dataset.model;
if (_cachedModelIds.has(name) || [..._cachedModelIds].some(id => id.endsWith('/' + name?.split('/').pop()))) {
const nameEl = row.querySelector('.hwfit-name');
if (nameEl && !nameEl.querySelector('.hwfit-dl-dot')) {
nameEl.insertAdjacentHTML('beforeend', '\u25CF');
}
}
});
}).catch(() => {});
}
try {
const sortBy = document.getElementById('hwfit-sort')?.value || 'score';
const quantPref = document.getElementById('hwfit-quant')?.value || '';
const targetCtx = _ctxValue();
// Get active GPU count from toggles
const toggleContainer = document.getElementById('hwfit-gpu-toggles');
let gpuCountOverride = '';
if (!hasManualOrDismissed && toggleContainer && typeof toggleContainer._activeCount === 'number') {
gpuCountOverride = String(toggleContainer._activeCount);
}
// Which homogeneous GPU pool to rank against (heterogeneous boxes only).
let gpuGroupOverride = '';
if (!hasManualOrDismissed && toggleContainer && toggleContainer._activeGroup) {
gpuGroupOverride = String(toggleContainer._activeGroup);
}
const params = new URLSearchParams({ limit: '80', sort: sortBy });
if (fresh) params.set('fresh', '1'); // bypass the hardware-scan cache
if (search) params.set('search', search);
if (remoteHost) {
params.set('host', remoteHost);
const _srv = _serverByVal(_envState.remoteServerKey || remoteHost);
const _hp = _srv?.port || '';
if (_hp) params.set('ssh_port', _hp);
if (_srv?.platform) params.set('platform', _srv.platform);
}
if (gpuCountOverride !== '') params.set('gpu_count', gpuCountOverride);
if (gpuGroupOverride !== '') params.set('gpu_group', gpuGroupOverride);
if (_dismissedHwChips.has('gpu') || _dismissedHwChips.has('vram')) params.set('ignore_detected_gpu', 'true');
if (_dismissedHwChips.has('ram')) params.set('ignore_detected_ram', 'true');
const manualParams = _manualHwParams();
Object.entries(manualParams).forEach(([k, v]) => {
if (v !== '') params.set(k, v);
});
if (hasManualOrDismissed) params.set('_hw_override_ts', String(Date.now()));
// Image models use a separate registry/endpoint
const isImageMode = useCase === 'image_gen';
if (!isImageMode) {
if (useCase) params.set('use_case', useCase);
if (quantPref) params.set('quant', quantPref);
if (targetCtx) params.set('ctx', String(targetCtx));
// Fit-only filter — set by the dot in the Fit column header.
const _fitOnly = (() => { try { return localStorage.getItem('hwfit_fit_only_v1') === '1'; } catch { return false; } })();
if (_fitOnly) params.set('fit_only', '1');
}
const endpoint = isImageMode ? `/api/hwfit/image-models?${params}` : `/api/hwfit/models?${params}`;
const res = await fetch(endpoint);
// A newer scan started while this one was in flight (user switched servers
// mid-probe) — drop this stale response so it can't clobber the new one.
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
if (!res.ok) {
const body = await res.text().catch(() => '');
let msg = '';
try {
const payload = JSON.parse(body);
msg = payload && (payload.detail || payload.error || payload.message);
} catch {
msg = body;
}
msg = typeof msg === 'string' ? msg.trim() : '';
throw new Error(`HTTP ${res.status} ${res.statusText}${msg ? `: ${msg}` : ''}`);
}
let data = await res.json();
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
if (!isImageMode && quantPref && !data.error && Array.isArray(data.models) && data.models.length === 0) {
const fallbackParams = new URLSearchParams(params);
fallbackParams.delete('quant');
const fallbackRes = await fetch(`/api/hwfit/models?${fallbackParams}`);
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
if (fallbackRes.ok) {
const fallbackData = await fallbackRes.json();
if (!fallbackData.error && Array.isArray(fallbackData.models) && fallbackData.models.length > 0) {
data = fallbackData;
const quantSel = document.getElementById('hwfit-quant');
if (quantSel) quantSel.value = '';
}
}
}
// Normalize image model fields to match LLM renderer expectations
if (isImageMode && data.models) {
data.models = data.models.map(m => ({
...m,
name: m.id || m.name,
fit_level: m.fit || 'no_fit',
parameter_count: m.params_b ? m.params_b + 'B' : '?',
required_gb: m.vram_needed || 0,
speed_tps: 0,
context: 0,
run_mode: m.capabilities?.[0] || 'image',
is_image_gen: true,
quant: m.quant || m.default_quant || 'BF16',
quant_repo: m.quant_repo || null,
}));
}
wp.destroy();
if (data.error) {
// Keep the instantly-painted cache if we had one — don't replace good data
// with an error on a transient probe failure (stale-while-revalidate).
if (!_cached) { _hwfitShowError(list, remoteHost, data.error); if (hw) hw.innerHTML = ''; }
return;
}
// Merge Ollama library rows into the main list so they appear with the
// same Fit/Param/Quant/VRAM/Mode columns as HF results and respond to the
// Engine filter. Skipped in image-gen mode (Ollama doesn't serve diffusers).
if (!isImageMode) {
const _vramAvail = data.system?.gpu_vram_gb || 0;
const _ramAvail = data.system?.total_ram_gb || 0;
const _lib = await _ensureOllamaLib();
const _olRows = _ollamaToHwfitRows(_lib, _vramAvail, _ramAvail);
// Search filter on Ollama rows: HF API already filters by search; do the
// same client-side over Ollama name + description so the search box
// works consistently across both sources.
const _s = (search || '').trim().toLowerCase();
const _olFiltered = _s
? _olRows.filter(r => r.name.toLowerCase().includes(_s) || (r._description || '').toLowerCase().includes(_s))
: _olRows;
data.models = (data.models || []).concat(_olFiltered);
}
_hwfitCache = data;
_hwfitRenderHw(hw, data.system);
// Propagate local platform from hardware probe so _isWindows(task) works
// for local tasks (menu items, shell commands, etc.).
if (!remoteHost && data.system && data.system.platform) {
_envState.platform = data.system.platform;
}
// Sort client-side by the active column so the highest↔lowest toggle is
// deterministic (the previous array .reverse() didn't reliably flip).
// 1st click on a column = highest first; clicking it again = lowest first.
if (!isImageMode) {
const sortSel = document.getElementById('hwfit-sort');
const sortKey = sortSel?.value || 'score';
const asc = sortSel?.dataset.reverse === '1'; // reversed → ascending (lowest first)
if (sortKey === 'fit') {
// fit_level is categorical (perfect→good→marginal→too_tight), not numeric,
// so rank it explicitly instead of falling through to the score column.
// Tie-break by score so rows within one fit tier stay meaningfully ordered.
const fitRank = { perfect: 4, good: 3, marginal: 2, too_tight: 1, no_fit: 0 };
data.models.sort((a, b) => {
const ar = fitRank[a.fit_level] ?? -1, br = fitRank[b.fit_level] ?? -1;
if (ar !== br) return asc ? ar - br : br - ar;
const as = Number(a.score) || 0, bs = Number(b.score) || 0;
return asc ? as - bs : bs - as;
});
} else {
const field = { score: 'score', vram: 'required_gb', speed: 'speed_tps', params: 'params_b', context: 'context' }[sortKey] || 'score';
data.models.sort((a, b) => {
const av = Number(a[field]) || 0, bv = Number(b[field]) || 0;
return asc ? av - bv : bv - av;
});
}
}
_hwfitRenderList(list, _applyEngineFilter(data.models));
// Persist this result so the next page load can paint it instantly.
_writeScanCache(_sig, data);
// Render GPU toggles — only on first scan (no override active)
if (toggleContainer && !toggleContainer._originalSystem) {
// Only trust the system info if no GPU override was applied
if (toggleContainer._activeCount === undefined) {
toggleContainer._originalSystem = { ...data.system };
_renderGpuToggles(toggleContainer._originalSystem);
}
}
} catch (e) {
wp.destroy();
// Same stale-while-revalidate rule: only surface the error if we have nothing
// already on screen from the cache.
if (!_cached) _hwfitShowError(list, remoteHost, e.message);
}
}
export function _hwfitRenderHw(el, sys) {
if (!el || !sys) return;
// Cache system info globally so other modules can read VRAM without refetching
try { window._hwfitSystemCache = sys; } catch {}
// Show the hardware row when we have data
const hwRow = document.getElementById('hwfit-hw-row');
if (hwRow) hwRow.style.display = 'flex';
const gpuCount = sys.gpu_count || 0;
// gpu_error = nvidia-smi present but failing (e.g. driver/library version
// mismatch). Surface it instead of the misleading "No GPU" — plain text
// label, full error in the tooltip.
// Chip rendering: split into a clickable body (toggle off / on) and a
// separate × button (fully remove from view + treat as dismissed for
// ranking). The body's "off" state is just visually dimmed — the
// chip stays visible so you can flip it back on without re-scanning.
const chip = (key, label, title = 'Click to toggle off (X to hide)') => {
if (_removedHwChips.has(key)) return '';
const dim = _dismissedHwChips.has(key) ? ' hwfit-hw-chip-off' : '';
return (
``
+ ``
+ ``
+ ``
);
};
let gpuChip;
if (sys.gpu_name) {
// Mixed-GPU boxes (#711): `${gpuCount}x ${gpu_name}` uses gpus[0].name for
// every card, so a 4090+3060 reads as "2x RTX 4090". Use gpu_groups (the
// backend already groups identical cards) to render each pool separately
// and put the per-card index+VRAM into the tooltip so it's actually
// useful for picking CUDA_VISIBLE_DEVICES.
const groups = Array.isArray(sys.gpu_groups) ? sys.gpu_groups : [];
// Shorten vendor prefixes so a mixed-GPU label fits in the chip row
// without overflowing. Single-GPU label still shows the full name
// (that's what users are used to seeing). Tooltip carries the full
// unmodified names regardless, so no information is lost.
const _shortGpuName = (n) => String(n || '')
.replace(/^NVIDIA\s+GeForce\s+/i, '')
.replace(/^NVIDIA\s+/i, '')
.replace(/^AMD\s+Radeon\s+/i, '')
.replace(/^AMD\s+/i, '')
.replace(/^Intel\s+/i, '');
let label;
if (groups.length > 1) {
// Heterogeneous: "1× RTX 4090 + 1× RTX 3060"
label = groups.map(g => `${g.count}× ${esc(_shortGpuName(g.name))}`).join(' + ');
} else if (gpuCount > 1) {
label = `${gpuCount}× ${esc(sys.gpu_name)}`;
} else {
label = esc(sys.gpu_name);
}
const gpus = Array.isArray(sys.gpus) ? sys.gpus : [];
const tip = gpus.length
? gpus.map(g => `GPU ${g.index}: ${g.name} · ${(+g.vram_gb).toFixed(1)} GB`).join('\n')
: 'Click to toggle off (X to hide)';
gpuChip = chip('gpu', label, tip);
} else if (sys.gpu_error) {
gpuChip = _removedHwChips.has('gpu')
? ''
: (() => {
const dim = _dismissedHwChips.has('gpu') ? ' hwfit-hw-chip-off' : '';
return (
``
+ ``
+ ``
+ ``
);
})();
} else {
gpuChip = chip('gpu', 'No GPU');
}
const vram = sys.gpu_vram_gb ? `${sys.gpu_vram_gb.toFixed(1)} GB VRAM` : '';
const ram = `${sys.available_ram_gb?.toFixed(1) || '?'} / ${sys.total_ram_gb?.toFixed(1) || '?'} GB RAM`;
const cores = `${sys.cpu_cores || '?'} cores`;
const manual = _manualHwState();
const manualChip = (sys.manual_hardware || manual)
? ``
+ ``
+ ``
+ ``
: '';
el.innerHTML = gpuChip
+ (vram ? chip('vram', vram) : '')
+ chip('ram', ram)
+ chip('cores', cores)
+ chip('backend', esc(sys.backend || ''))
+ manualChip;
// Body click → toggle "off" (dimmed, still visible). Membership of
// _dismissedHwChips is what the ranker reads, so both add+remove
// here also flips the model list. The manual chip is excluded —
// dimming "manual" has no ranking effect (the key isn't checked),
// so click-to-toggle there would feel broken. Use × to clear it.
el.querySelectorAll('.hwfit-hw-chip-toggle').forEach(btn => {
btn.addEventListener('click', (e) => {
e.stopPropagation();
const key = btn.dataset.hwChip;
if (!key || key === 'manual') return;
const row = btn.closest('.hwfit-hw-chip-row');
if (_dismissedHwChips.has(key)) {
_dismissedHwChips.delete(key);
row?.classList.remove('hwfit-hw-chip-off');
} else {
_dismissedHwChips.add(key);
row?.classList.add('hwfit-hw-chip-off');
}
_resetGpuToggleState(false);
_hwfitCache = null;
_hwfitFetch(true);
});
});
// × button → fully remove the chip from view AND treat it as
// dismissed for ranking purposes (until next rescan).
el.querySelectorAll('.hwfit-hw-chip-x').forEach(btn => {
btn.addEventListener('click', (e) => {
e.stopPropagation();
const key = btn.dataset.hwChip;
if (!key) return;
// The manual-hardware chip needs special teardown: clear the
// saved manual state so the chip doesn't re-render on the next
// fetch from localStorage. Routes through clearManual() which
// also collapses the edit panel.
if (key === 'manual') {
_saveManualHwState(null);
btn.closest('.hwfit-hw-chip-row')?.remove();
document.getElementById('hwfit-manual-panel')?.classList.add('hidden');
_resetGpuToggleState();
_hwfitCache = null;
_hwfitFetch(true);
return;
}
_removedHwChips.add(key);
_dismissedHwChips.add(key);
btn.closest('.hwfit-hw-chip-row')?.remove();
_resetGpuToggleState(false);
_hwfitCache = null;
_hwfitFetch(true);
});
});
_wireManualHardwareControls(el);
}
function _wireManualHardwareControls(el) {
const btn = document.getElementById('hwfit-hw-manual-btn');
const panel = document.getElementById('hwfit-manual-panel');
if (!btn || !panel) return;
const clearManual = () => {
_saveManualHwState(null);
el.querySelector('.hwfit-hw-chip-manual')?.remove();
panel.classList.add('hidden');
_resetGpuToggleState();
_hwfitCache = null;
_hwfitFetch(true);
};
const manual = _manualHwState();
btn.textContent = 'EDIT';
if (manual) {
panel.querySelector('.hwfit-manual-mode').value = manual.mode || 'gpu';
panel.querySelector('.hwfit-manual-backend').value = manual.backend || 'cuda';
}
const syncMode = () => {
const isRam = panel.querySelector('.hwfit-manual-mode')?.value === 'ram';
panel.querySelector('.hwfit-manual-gpus')?.closest('label')?.style.setProperty('display', isRam ? 'none' : '');
panel.querySelector('.hwfit-manual-vram')?.closest('label')?.style.setProperty('display', isRam ? 'none' : '');
const backend = panel.querySelector('.hwfit-manual-backend');
if (backend) backend.style.display = isRam ? 'none' : '';
};
if (!btn._hwfitManualBound) {
btn._hwfitManualBound = true;
btn.addEventListener('click', () => {
panel.classList.toggle('hidden');
syncMode();
});
}
el.querySelector('.hwfit-hw-chip-toggle[data-hw-chip="manual"]')?.addEventListener('click', () => {
panel.classList.remove('hidden');
syncMode();
});
if (!panel._hwfitManualBound) {
panel._hwfitManualBound = true;
panel.querySelector('.hwfit-manual-mode')?.addEventListener('change', syncMode);
panel.querySelector('.hwfit-hw-manual-save')?.addEventListener('click', () => {
const mode = panel.querySelector('.hwfit-manual-mode')?.value || 'gpu';
const gpuCount = _manualNumber(panel.querySelector('.hwfit-manual-gpus')?.value, 1);
const vramGb = _manualNumber(panel.querySelector('.hwfit-manual-vram')?.value, 8);
const ramGb = _manualOptionalNumber(panel.querySelector('.hwfit-manual-ram')?.value);
const backend = panel.querySelector('.hwfit-manual-backend')?.value || 'cuda';
const manual = { mode, gpuCount, vramGb, ramGb, backend };
_saveManualHwState(manual);
_resetGpuToggleState();
_hwfitCache = null;
panel.classList.add('hidden');
_hwfitRenderHw(el, _manualDisplaySystem(window._hwfitSystemCache, manual));
_hwfitFetch(true);
});
panel.querySelector('.hwfit-hw-manual-clear')?.addEventListener('click', clearManual);
}
syncMode();
}
export const _fitColors = { perfect: 'var(--green, #50fa7b)', good: 'var(--yellow, #f1fa8c)', marginal: 'var(--orange, #ffb86c)', too_tight: 'var(--red, #ff5555)' };
function _requiresAcceleratorBackend(model) {
const q = String(model?.quant || model?.quantization || '').toUpperCase();
const text = `${model?.name || ''} ${model?.repo_id || ''} ${model?.path || ''}`.toLowerCase();
return /^AWQ|^GPTQ|^NVFP4/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8|nvfp4)\b/i.test(text);
}
function _modeLabel(model) {
if (model?.is_image_gen) return 'image';
if (_requiresAcceleratorBackend(model)) return 'vLLM/SGLang';
const detected = _detectBackend(model);
if (detected?.label) return detected.label;
return String(model?.run_mode || '').replace('_', '+');
}
export const _hwfitColumns = [
{ key: 'fit', label: 'Fit', cls: 'hwfit-fit' },
{ key: null, label: 'Model', cls: 'hwfit-name' },
{ key: 'params',label: 'Param', cls: 'hwfit-c-params' },
{ key: null, label: 'Quant', cls: 'hwfit-c-quant' },
{ key: 'vram', label: 'VRAM', cls: 'hwfit-c-vram' },
{ key: 'context',label: 'Ctx', cls: 'hwfit-c-ctx' },
{ key: 'speed', label: 'Speed', cls: 'hwfit-c-speed' },
{ key: 'score', label: 'Score', cls: 'hwfit-c-score' },
{ key: null, label: 'Mode', cls: 'hwfit-c-mode' },
];
export function _hwfitRenderList(el, models) {
if (!el) return;
models = models || [];
if (!models.length) {
// Disambiguate WHY the list is empty so capable servers don't read as "too weak":
// active filters vs. a likely under-reported probe vs. genuinely low hardware.
const sys = _hwfitCache?.system;
const hasHw = sys && ((sys.gpu_vram_gb || 0) > 0 || (sys.total_ram_gb || 0) > 8);
const hasFilters = !!(document.getElementById('hwfit-search')?.value?.trim()
|| document.getElementById('hwfit-usecase')?.value
|| document.getElementById('hwfit-quant')?.value
|| document.getElementById('hwfit-engine')?.value);
let msg;
if (hasFilters) msg = 'No models match these filters — try clearing the search, use-case, quant, or engine.';
else if (hasHw) msg = 'No models fit — the hardware probe may have under-reported. Try Rescan.';
else msg = 'No models fit your hardware';
el.innerHTML = `
${msg}
`;
return;
}
const sortSel = document.getElementById('hwfit-sort');
const currentSort = sortSel?.value || 'score';
const isReversed = sortSel?.dataset.reverse === '1';
// Active budget for the Fit column label \u2014 make it obvious whether the
// ranking is against GPU or RAM so "tightest" can't be ambiguous on a
// mixed-resource box.
const tc = document.getElementById('hwfit-gpu-toggles');
const _budget = (tc && typeof tc._activeCount === 'number')
? (tc._activeCount === 0 ? 'RAM' : (tc._activeCount === 1 ? 'GPU' : tc._activeCount + ' GPU'))
: null;
let html = '
';
for (const col of _hwfitColumns) {
const sortable = col.key ? ' hwfit-sortable' : '';
const active = col.key === currentSort ? ' hwfit-sort-active' : '';
let arrow = '';
if (col.key === currentSort) {
// \u25BC = highest first (default), \u25B2 = reversed (lowest first) \u2014 uniform
// across all columns now.
arrow = isReversed ? ' \u25B2' : ' \u25BC';
}
const dataAttr = col.key ? ` data-sort="${col.key}"` : '';
// Fit column gets a small dot to its left that toggles "show only models
// that fit" — replaces the old Fits On/Off button next to the toolbar.
let label = col.label;
if (col.cls === 'hwfit-fit') {
const _fitOnly = (() => { try { return localStorage.getItem('hwfit_fit_only_v1') === '1'; } catch { return false; } })();
label = `●${col.label}`;
// (Budget tag removed — the GPU/RAM/N-GPU suffix next to "Fit" was noise;
// the toggle row already shows which budget is active.)
}
html += `${label}${arrow}`;
}
html += '
`;
html += `${esc(fitLabel)}`;
// Append quant to the title when it's not already in the repo name. The
// suffix strips quant-parts the name already contains — e.g. for
// QuantTrio/MiniMax-M2-AWQ + quant=AWQ-4bit we just show "(4bit)", not
// "(AWQ-4bit)". DeepSeek-V4-Flash + FP4-MoE-Mixed keeps the full tag
// (none of those parts are in the repo id).
const _short = m.name?.split('/').pop() || m.name || '';
const _quantTag = (m.quant || '').trim();
const _lowerShort = _short.toLowerCase();
let _quantSuffix = '';
if (_quantTag) {
const _parts = _quantTag.split(/[-_]/).filter(Boolean);
const _remaining = _parts.filter(p => !_lowerShort.includes(p.toLowerCase()));
if (_remaining.length && _remaining.length < _parts.length + 1) { // at least one part is new
let _display = _remaining.join('-');
if (_display.length > 9) _display = _display.slice(0, 9) + '…';
_quantSuffix = ` (${esc(_display)})`;
}
}
html += `${modelLogo(m.name)}${esc(_short)}${_quantSuffix}${moeBadge}${imgBadge}${dlDot}`;
html += `${esc(pcount)}`;
// Truncate the Quant cell to 9 chars + ellipsis so long tags like
// "FP4-MoE-Mixed" don't push neighboring columns. Full tag stays in title.
const _qRaw = m.quant || '?';
const _qShort = _qRaw.length > 9 ? _qRaw.slice(0, 9) + '…' : _qRaw;
html += `${esc(_qShort)}`;
html += `${vramLabel}`;
html += `${m.is_image_gen ? '\u2014' : ctx}`;
html += `${m.is_image_gen ? '\u2014' : tps + ' t/s'}`;
html += `${score}`;
html += `${esc(modeLabel)}`;
html += `
`;
}
el.innerHTML = html;
// Click row → expand inline action panel. Exception: Ollama rows skip the
// expand panel (no HF metadata to power it) and just fill the Download
// input with the `:` tag — one click → ready to pull.
el.querySelectorAll('.hwfit-row:not(.hwfit-header)').forEach(row => {
row.addEventListener('click', () => {
const name = row.dataset.model;
if (!name) return;
const modelData = (_hwfitCache?.models || []).find(m => m.name === name);
if (!modelData) return;
if (modelData._isOllama) {
// Force-open the Download card if it's been collapsed — otherwise
// filling the (hidden) input silently swallows the click.
const dlBody = document.getElementById('cookbook-download-card-body');
const dlArrow = document.getElementById('cookbook-download-card-arrow');
if (dlBody && dlBody.style.display === 'none') {
dlBody.style.display = 'block';
if (dlArrow) dlArrow.style.transform = 'rotate(90deg)';
}
const dlInput = document.getElementById('cookbook-dl-repo');
if (dlInput) {
dlInput.value = modelData.name;
dlInput.focus();
// Briefly highlight so the user sees what got filled even when the
// download card sits far above the (long) hwfit list.
dlInput.classList.add('cookbook-dl-flash');
setTimeout(() => dlInput.classList.remove('cookbook-dl-flash'), 800);
dlInput.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
return;
}
_expandModelRow(row, modelData);
});
});
// Clickable header columns → sort (click again to toggle direction)
el.querySelectorAll('.hwfit-header .hwfit-sortable').forEach(col => {
col.addEventListener('click', (e) => {
// The little dot inside the Fit header is its own toggle (fit-only
// filter), don't let it fall through to a sort click.
if (e.target.closest('[data-fit-dot]')) {
const on = !e.target.classList.contains('active');
try { localStorage.setItem('hwfit_fit_only_v1', on ? '1' : '0'); } catch {}
// Un-toggling the fit filter (off → showing too-tight rows again) is
// typically because the user wants to see the LARGE models they can't
// run yet — re-sort by VRAM descending so the biggest surface first.
if (!on) {
const sortSel = document.getElementById('hwfit-sort');
if (sortSel) {
sortSel.value = 'vram';
sortSel.dataset.reverse = '0'; // descending (biggest first)
}
}
_hwfitCache = null;
_hwfitFetch();
return;
}
const sortKey = col.dataset.sort;
if (!sortKey) return;
const sel = document.getElementById('hwfit-sort');
if (!sel) return;
// Toggle direction if clicking the same column
if (sel.value === sortKey) {
sel.dataset.reverse = sel.dataset.reverse === '1' ? '0' : '1';
} else {
sel.value = sortKey;
sel.dataset.reverse = '0';
}
_hwfitFetch();
});
});
}
// Read the server currently selected in the scan dropdown and make it the
// active host. Called right before a download/run so the action targets the
// server the user sees selected — defends against the global remoteHost being
// changed elsewhere (e.g. background serve-task handling) between selecting and
// clicking, which was sending downloads to the wrong host.
// Resolve the server the user currently has selected in the scan dropdown and
// return its host string (''/local for local). Also mirrors it into _envState
// for the command preview. The RETURN VALUE is the source of truth passed to
// the download — never trust _envState.remoteHost downstream (multiple copies).
function _syncHostFromScanDropdown() {
const ss = document.getElementById('hwfit-server-select');
if (!ss || ss.value == null) return _envState.remoteHost || '';
let host = '';
if (ss.value === 'local') {
_envState.remoteHost = '';
_envState.remoteServerKey = '';
} else {
const s = _serverByVal(ss.value);
if (s) {
host = s.host;
_envState.remoteHost = s.host;
_envState.remoteServerKey = _serverKey(s);
_envState.env = s.env;
_envState.envPath = s.envPath;
_envState.platform = s.platform || '';
}
}
try { _persistEnvState(); } catch {}
return host;
}
export function _expandModelRow(row, modelData) {
const list = row.closest('.hwfit-list');
if (!list) return;
const existingPanel = list.querySelector('.hwfit-action-panel');
const wasActive = row.classList.contains('hwfit-row-active');
// Remove existing panel and active state
if (existingPanel) existingPanel.remove();
list.querySelectorAll('.hwfit-row-active').forEach(r => r.classList.remove('hwfit-row-active'));
// Toggle: if clicking same row, just close
if (wasActive) return;
row.classList.add('hwfit-row-active');
const { backend, label } = _detectBackend(modelData);
const isVllm = backend === 'vllm';
const isLlamaCpp = backend === 'llamacpp';
const ctx = modelData.context || 8192;
const dlSource = _downloadSourceRepo(modelData, backend);
const hfUrl = `https://huggingface.co/${dlSource.repo}`;
let html = `
`;
html += `
`;
html += `${esc(modelData.name)}${dlSource.kind ? ` (${esc(dlSource.kind)} ${esc(modelData.quant || '')})` : (modelData.quant_repo ? ` (${esc(modelData.quant)})` : '')}`;
html += `${esc(label)}`;
html += `HF \u2197`;
html += `
`;
html += `
`;
html += ``;
if (!modelData.is_image_gen) {
html += ``;
html += ``;
}
html += `
`;
} else if (_requiresAcceleratorBackend(modelData)) {
// Only show the "needs CUDA/ROCm" note when the host doesn't already have
// one. With a visible CUDA/ROCm accelerator the note is noise — the user
// can already serve the model and reading the warning on every row makes
// the panel feel like everything's broken.
const _sys = _hwfitCache?.system || {};
const _backend = (_sys.backend || '').toLowerCase();
const _hasGpuAccel = !!_sys.has_gpu && (_backend === 'cuda' || _backend === 'rocm');
if (!_hasGpuAccel) {
html += `
This is a safetensors GPU-serving format. Use vLLM/SGLang with a visible CUDA/ROCm accelerator, or pick a GGUF download for llama.cpp/Ollama.
`;
}
}
html += `
`;
row.insertAdjacentHTML('afterend', html);
const panel = row.nextElementSibling;
// Wire download button
const dlBtn = panel.querySelector('.hwfit-dl-btn');
if (dlBtn) {
dlBtn.addEventListener('click', () => {
const host = _syncHostFromScanDropdown(); // host the user picked, passed explicitly
if (backend === 'ollama') {
_runPanelCmd(panel, _buildDownloadCmd(modelData, backend), { timeout: 0 });
} else {
_runModelDownload(panel, modelData, backend, host);
}
});
}
// Wire quick-run button — download + launch with smart defaults
const quickRunBtn = panel.querySelector('.hwfit-quickrun-btn');
if (quickRunBtn) {
quickRunBtn.addEventListener('click', async () => {
const _qrHost = _syncHostFromScanDropdown();
// Don't serve a model that isn't downloaded yet. vLLM/SGLang would
// background-pull at launch, so the serve task shows up as "running" in
// the Running tab while nothing is actually served (and llama.cpp just
// errors "No GGUF found"). The Configure button and the Serve tab already
// gate on the cached-model list — mirror that here. When the model isn't
// present, honor the button's "Download" half by kicking off the download
// instead, then the user can Run again to serve once it finishes.
const _short = modelData.name.split('/').pop();
const _downloaded = _cachedModelIds && (
_cachedModelIds.has(modelData.name)
|| [..._cachedModelIds].some(id => id === modelData.name || id.endsWith('/' + _short))
);
if (_cachedModelIds && !_downloaded) {
uiModule.showToast('Model not downloaded yet — starting download. Run again to serve once it finishes.');
if (backend === 'ollama') {
_runPanelCmd(panel, _buildDownloadCmd(modelData, backend), { timeout: 0 });
} else {
_runModelDownload(panel, modelData, backend, _qrHost);
}
return;
}
quickRunBtn.disabled = true;
quickRunBtn.textContent = 'Starting...';
// Smart defaults based on hardware and model
const system = _hwfitCache?.system || {};
// Prefer the active homogeneous pool (the route sets active_group when a GPU
// pool is selected). Its per-GPU VRAM + device indices are what we serve on —
// vLLM can only tensor-parallel across identical GPUs, so we pin to one pool.
const grp = system.active_group || null;
const poolCount = (grp && grp.use_count) || system.gpu_count || 1;
const gpuMem = (grp && grp.vram_each) || (system.gpu_vram_gb / (system.gpu_count || 1)) || 20;
const modelVram = modelData.required_gb || 10;
// TP must be a power of two within the pool (plus the exact pool size) —
// pick the smallest that fits the model in VRAM, else the whole pool.
const _tpOpts = [1, 2, 4, 8, 16].filter(n => n <= poolCount);
if (poolCount > 0 && !_tpOpts.includes(poolCount)) _tpOpts.push(poolCount);
let tp = _tpOpts[_tpOpts.length - 1] || 1;
for (const n of _tpOpts) { if (n * gpuMem >= modelVram) { tp = n; break; } }
// Pin to exactly this pool's first `tp` GPUs so vLLM can't reach across into
// a mismatched pool. Respect a manual GPU pin (_envState.gpus) if the user set one.
let cudaDevices = '';
if (grp && Array.isArray(grp.indices)) cudaDevices = grp.indices.slice(0, tp).join(',');
// Context: scale based on available VRAM headroom
const headroom = (tp * gpuMem) - modelVram;
let maxCtx = modelData.context_length || 8192;
if (headroom < 4) maxCtx = Math.min(maxCtx, 4096);
else if (headroom < 8) maxCtx = Math.min(maxCtx, 8192);
else if (headroom < 16) maxCtx = Math.min(maxCtx, 16384);
// GPU mem utilization
const gpuUtil = modelVram / (tp * gpuMem) > 0.8 ? '0.95' : '0.90';
// Tool parser
const parser = _detectToolParser(modelData.name);
const host = _envState.remoteHost || '';
const hostIp = host.includes('@') ? host.split('@').pop() : host;
const port = '8000';
const detected = _detectBackend(modelData);
const runBackend = detected.backend || 'vllm';
// Build serve command
let cmd = '';
if (runBackend === 'sglang') {
cmd = `python3 -m sglang.launch_server --model-path ${modelData.name} --host 0.0.0.0 --port ${port}`;
if (tp > 1) cmd += ` --tp ${tp}`;
cmd += ` --context-length ${maxCtx}`;
cmd += ` --mem-fraction-static ${gpuUtil}`;
cmd += ' --trust-remote-code';
} else if (runBackend === 'llamacpp') {
const dir = `"$HOME/.cache/huggingface/hub/models--${modelData.name.replace(/\//g, '--')}/snapshots"`;
const ggufPath = `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
cmd = `MODEL_FILE=${ggufPath} && { [ -n "$MODEL_FILE" ] && [ -f "$MODEL_FILE" ]; } || { echo "ERROR: No GGUF found on this host. Download a GGUF quant or switch backend."; exit 1; } && llama-server --model "$MODEL_FILE" --host 0.0.0.0 --port 8080 -ngl 99 -c ${maxCtx} || python3 -m llama_cpp.server --model "$MODEL_FILE" --host 0.0.0.0 --port 8080 --n_gpu_layers 99 --n_ctx ${maxCtx}`;
} else {
cmd = `vllm serve ${modelData.name} --host 0.0.0.0 --port ${port}`;
cmd += ` --tensor-parallel-size ${tp}`;
cmd += ` --max-model-len ${maxCtx}`;
cmd += ` --gpu-memory-utilization ${gpuUtil}`;
cmd += ' --dtype auto';
cmd += ' --enforce-eager';
cmd += ' --trust-remote-code';
cmd += ` --enable-auto-tool-choice --tool-call-parser ${parser}`;
}
// Build env prefix
let envPrefix = '';
if (_envState.env === 'venv' && _envState.envPath) {
const p = _envState.envPath;
envPrefix = 'source ' + _shellQuote(p.endsWith('/bin/activate') ? p : p + '/bin/activate');
} else if (_envState.env === 'conda' && _envState.envPath) {
envPrefix = 'eval "$(conda shell.bash hook)" && conda activate ' + _shellQuote(_envState.envPath);
}
// Launch via serve API. Field names must match the backend ServeRequest
// schema (repo_id + cmd) — sending `command`/`model` failed Pydantic
// validation (422), which is why Run silently did nothing.
const _srv = _serverByVal(_envState.remoteServerKey || host);
const payload = {
repo_id: modelData.name,
cmd: cmd,
remote_host: host || undefined,
ssh_port: (_srv && _srv.port) || undefined,
env_prefix: envPrefix || undefined,
hf_token: _envState.hfToken || undefined,
gpus: _envState.gpus || cudaDevices || undefined,
platform: _envState.platform || undefined,
};
try {
const res = await fetch('/api/model/serve', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
const data = await res.json();
if (data.ok) {
const shortName = modelData.name.split('/').pop();
_addTask(data.session_id, shortName, 'serve', { _cmd: cmd, model: modelData.name, backend: runBackend, remote_host: host });
_renderRunningTab();
uiModule.showToast(`Launching ${shortName}...`);
// Switch to Running tab
const runTab = document.querySelector('.cookbook-tab[data-backend="Running"]');
if (runTab) runTab.click();
} else {
uiModule.showError('Launch failed: ' + (data.error || ''));
}
} catch (e) {
uiModule.showError('Launch failed: ' + e.message);
}
quickRunBtn.disabled = false;
quickRunBtn.textContent = 'Run';
});
}
// Wire configure button — open the model's Serve panel.
const configBtn = panel.querySelector('.hwfit-serve-expand-btn');
if (configBtn) {
configBtn.addEventListener('click', async () => {
const repo = modelData.name;
const short = repo?.split('/').pop();
// Use the same "downloaded" source as the dl-dot (_cachedModelIds), NOT a
// DOM lookup for .hwfit-cached-item — those only exist on the Serve tab, so
// from the What-Fits tab the old check always failed and falsely said
// "download first" even for models that ARE downloaded.
const downloaded = _cachedModelIds && (
_cachedModelIds.has(repo)
|| [..._cachedModelIds].some(id => id === repo || id.endsWith('/' + short))
);
if (_cachedModelIds && !downloaded) {
uiModule.showToast('Download the model first, then configure from Serve tab');
return;
}
// Downloaded (or cache state unknown) — open the Serve panel, which switches
// to the Serve tab, fetches the cached list, and expands this model's card.
try {
const { openServePanelForRepo } = await import('./cookbookServe.js');
await openServePanelForRepo(repo);
} catch (e) {
uiModule.showToast('Could not open Serve: ' + (e && e.message ? e.message : e));
}
});
}
}
export function _hwfitInit() {
const uc = document.getElementById('hwfit-usecase');
const sort = document.getElementById('hwfit-sort');
const qpref = document.getElementById('hwfit-quant');
const ctx = document.getElementById('hwfit-context');
const ctxLabel = document.getElementById('hwfit-context-label');
const search = document.getElementById('hwfit-search');
const remote = document.getElementById('hwfit-host');
_syncCtxControl();
if (uc) uc.addEventListener('change', () => _hwfitFetch());
if (sort) sort.addEventListener('change', () => _hwfitFetch());
if (qpref) qpref.addEventListener('change', () => _hwfitFetch());
// Engine filter is a pure client-side view filter over the already-fetched
// list (HF + Ollama merged), so just re-render from cache.
const engine = document.getElementById('hwfit-engine');
if (engine) engine.addEventListener('change', () => {
const list = document.getElementById('hwfit-list');
if (list && _hwfitCache && Array.isArray(_hwfitCache.models)) {
_hwfitRenderList(list, _applyEngineFilter(_hwfitCache.models));
} else {
_hwfitFetch();
}
});
if (ctx && !ctx.dataset.bound) {
ctx.dataset.bound = '1';
ctx.addEventListener('input', () => {
if (ctxLabel) ctxLabel.textContent = _ctxLabel(_ctxValue());
});
ctx.addEventListener('change', () => {
const targetCtx = _ctxValue();
try { localStorage.setItem(_CTX_KEY, String(targetCtx)); } catch {}
// Ctx drag affects sort mode: a specific ctx target (anything < Max)
// implies "what runs at this context length" — sort by VRAM ascending
// so the cheapest-fitting models surface first. Dragging back to Max
// releases the constraint → go back to the default score ranking.
const sortSel = document.getElementById('hwfit-sort');
if (sortSel) {
if (targetCtx) {
sortSel.value = 'vram';
sortSel.dataset.reverse = '1'; // ascending = smallest VRAM first
} else {
sortSel.value = 'score';
sortSel.dataset.reverse = '';
}
}
_hwfitCache = null;
_hwfitFetch();
});
}
// Rescan — force a fresh hardware probe (bypasses the per-host cache).
const rescan = document.getElementById('hwfit-rescan');
if (rescan && !rescan.dataset.bound) {
rescan.dataset.bound = '1';
rescan.addEventListener('click', async () => {
if (rescan.dataset.scanning) return; // ignore re-clicks mid-scan
rescan.dataset.scanning = '1';
const orig = rescan.innerHTML;
rescan.disabled = true;
rescan.style.opacity = '0.85';
// Swap the ↻ glyph for a live whirlpool so the click feels responsive
// during the (often slow) SSH hardware probe.
const wp = spinnerModule.createWhirlpool(12);
wp.element.style.marginRight = '4px';
wp.element.style.position = 'relative';
wp.element.style.top = '-2px'; // sit a touch higher, aligned with the label
rescan.innerHTML = '';
rescan.appendChild(wp.element);
rescan.appendChild(document.createTextNode('RESCAN'));
// Reset toggle state (no flicker — buttons stay until the fresh scan swaps them).
_resetGpuToggleState();
try {
await _hwfitFetch(true);
} finally {
try { wp.destroy(); } catch {}
rescan.innerHTML = orig;
rescan.disabled = false;
rescan.style.opacity = '';
delete rescan.dataset.scanning;
}
});
}
if (search) search.addEventListener('input', () => {
clearTimeout(_hwfitDebounce);
_hwfitDebounce = setTimeout(() => _hwfitFetch(), 400);
});
// HF Token
const hfToken = document.getElementById('hwfit-hftoken');
if (hfToken) {
hfToken.addEventListener('change', () => { _envState.hfToken = hfToken.value.trim(); _persistEnvState(); });
hfToken.addEventListener('input', () => { _envState.hfToken = hfToken.value.trim(); });
}
// Rebuild all server select dropdowns with current servers
function _rebuildServerSelect() {
const selectors = [
document.getElementById('hwfit-server-select'),
document.getElementById('hwfit-dl-server'),
];
for (const sel of selectors) {
if (!sel) continue;
const currentVal = sel.value;
let html = ``;
_envState.servers.forEach((s, i) => {
if (!s.host) return;
const label = s.name || s.host || `Server ${i + 1}`;
html += ``;
});
sel.innerHTML = html;
sel.value = currentVal;
}
}
// Servers — sync changes, add, remove
function _syncServers() {
const entries = document.querySelectorAll('.cookbook-server-entry');
_envState.servers = [];
entries.forEach(entry => {
const row = entry.querySelector('.cookbook-server-row');
if (!row) return;
const nameEl = row.querySelector('.cookbook-srv-name');
const hostEl = row.querySelector('.cookbook-srv-host');
const name = nameEl?.value.trim() || '';
const host = (hostEl?.disabled || hostEl?.readOnly) ? '' : (hostEl?.value.trim() || '');
const port = row.querySelector('.cookbook-srv-port')?.value.trim() || '';
const env = row.querySelector('.cookbook-srv-env')?.value || 'none';
const envPath = row.querySelector('.cookbook-srv-path')?.value.trim() || '';
// Collect model directories from tags. Read the authoritative data-dir
// attribute, not textContent \u2014 the tag now also holds a download-target
// icon, and textContent would fold the icon/\u2716 glyph into the path.
const dirTags = entry.querySelectorAll('.cookbook-modeldir-tag');
const modelDirs = [];
dirTags.forEach(tag => {
const d = (tag.dataset.dir || '').replaceAll('\u2715', '').replaceAll('\u2716', '').trim();
if (d) modelDirs.push(d);
});
if (!modelDirs.length) modelDirs.push('~/.cache/huggingface/hub');
// Which dir (if any) is flagged as the download target. '' = HF cache.
const dlEl = entry.querySelector('.cookbook-modeldir-dl.active');
const downloadDir = dlEl ? (dlEl.dataset.dlDir || '') : '';
const platform = entry.dataset.platform || '';
_envState.servers.push({ name, host: host || '', port, env, envPath, modelDirs, modelDir: modelDirs.filter(d => d !== '~/.cache/huggingface/hub')[0] || modelDirs[0], downloadDir, platform });
});
// Do NOT auto-change the selected host here. _syncServers can run while the
// servers DOM is mid-render — host fields that are disabled/readonly read as
// empty (see above), which made the rebuilt list temporarily miss the
// selected server. The old code then "fell back" to the first remote server
// and persisted it, silently flipping the active host even though the
// dropdown still showed odysseus. The user's selection must only change via
// an explicit dropdown pick. Here we just refresh env/path if we can match
// the current host; otherwise leave remoteHost untouched.
const sel = _serverByVal(_envState.remoteServerKey || _envState.remoteHost);
if (sel) { _envState.env = sel.env; _envState.envPath = sel.envPath; }
_persistEnvState();
}
async function _testServerConnection(entry) {
const host = entry.querySelector('.cookbook-srv-host')?.value?.trim();
const port = entry.querySelector('.cookbook-srv-port')?.value?.trim() || '';
const dot = entry.querySelector('.cookbook-srv-status');
const msg = entry.querySelector('.cookbook-srv-test-msg');
const setMsg = (text, color = '') => {
if (!msg) return;
msg.textContent = text || '';
msg.title = text || '';
msg.style.color = color || '';
msg.style.opacity = text ? '0.75' : '0.55';
};
if (!dot) return;
if (!host) {
dot.className = 'cookbook-srv-status';
dot.title = 'Enter user@host to test';
setMsg('');
return;
}
dot.className = 'cookbook-srv-status testing';
dot.title = 'Testing SSH…';
setMsg('Testing SSH...');
const pf = port && port !== '22' ? `-p ${port} ` : '';
const cmd = `ssh -o BatchMode=yes -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new ${pf}${host} "echo ok"`;
const t0 = Date.now();
try {
const res = await fetch('/api/shell/exec', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ command: cmd, timeout: 8 }),
});
const data = await res.json();
const ms = Date.now() - t0;
const out = (data.stdout || '').trim();
if (data.exit_code === 0 && out.startsWith('ok')) {
dot.className = 'cookbook-srv-status ok';
dot.title = `Reachable · ${ms} ms · use Dependencies to check tmux/HF setup`;
setMsg(`Connected · ${ms} ms`, 'var(--green,#50fa7b)');
} else {
dot.className = 'cookbook-srv-status fail';
const err = (data.stderr || data.stdout || `exit ${data.exit_code}`).toString().trim().slice(0, 240);
dot.title = `SSH failed: ${err}`;
setMsg(`Failed · ${err}`, 'var(--red,#e06c75)');
}
} catch (e) {
dot.className = 'cookbook-srv-status fail';
dot.title = `Test failed: ${e.message || e}`;
setMsg(`Failed · ${e.message || e}`, 'var(--red,#e06c75)');
}
}
function _singleQuote(value) {
return `'${String(value || '').replace(/'/g, `'\"'\"'`)}'`;
}
function _serverKeyCommand(host, port, publicKey) {
const pf = port && port !== '22' ? `-p ${port} ` : '';
const remote = [
`KEY=${_singleQuote(publicKey)}`,
'mkdir -p ~/.ssh',
'chmod 700 ~/.ssh',
'touch ~/.ssh/authorized_keys',
'(grep -qxF "$KEY" ~/.ssh/authorized_keys || printf "%s\\n" "$KEY" >> ~/.ssh/authorized_keys)',
'chmod 600 ~/.ssh/authorized_keys',
].join(' && ');
return `ssh -o StrictHostKeyChecking=accept-new ${pf}${host} ${_singleQuote(remote)}`;
}
async function _fetchCookbookSshKey(generate = false) {
const res = await fetch('/api/cookbook/ssh-key', {
method: generate ? 'POST' : 'GET',
credentials: 'same-origin',
});
const data = await res.json();
if (generate && !data.ok) throw new Error(data.error || 'Failed to generate SSH key');
return (data.public_key || '').trim();
}
async function _populateServerKeyPanel(entry, generate = false) {
const panel = entry.querySelector('.cookbook-server-key-panel');
const cmdBox = entry.querySelector('.cookbook-server-key-command');
const copyBtn = entry.querySelector('.cookbook-server-key-copy');
const genBtn = entry.querySelector('.cookbook-server-key-gen');
if (!panel || !cmdBox) return;
const host = entry.querySelector('.cookbook-srv-host')?.value?.trim() || '';
const port = entry.querySelector('.cookbook-srv-port')?.value?.trim() || '';
if (!host || !host.includes('@')) {
cmdBox.value = 'Enter the server as user@host first.';
if (copyBtn) copyBtn.disabled = true;
return;
}
if (!/^[A-Za-z0-9._~-]+@[A-Za-z0-9._:-]+$/.test(host) || (port && !/^\d{1,5}$/.test(port))) {
cmdBox.value = 'Use a plain SSH target like user@host and an optional numeric port.';
if (copyBtn) copyBtn.disabled = true;
return;
}
if (genBtn) {
genBtn.disabled = true;
genBtn.textContent = generate ? 'Generating...' : 'Loading...';
}
try {
let publicKey = await _fetchCookbookSshKey(generate);
if (!publicKey && !generate) publicKey = await _fetchCookbookSshKey(true);
cmdBox.value = _serverKeyCommand(host, port, publicKey);
if (copyBtn) copyBtn.disabled = false;
if (genBtn) genBtn.textContent = 'Key ready';
} catch (e) {
cmdBox.value = e.message || String(e);
if (copyBtn) copyBtn.disabled = true;
if (genBtn) genBtn.textContent = 'Generate key';
} finally {
if (genBtn) genBtn.disabled = false;
}
}
function _wireServerEntry(entry) {
// Idempotency guard: _hwfitInit() can run more than once per panel open,
// and re-wiring would stack duplicate listeners on every control (e.g. the
// model-dir "+" button would add two tags per click, change handlers fire
// twice). Bind each entry exactly once.
if (entry.dataset.wired) return;
entry.dataset.wired = '1';
// Inject the status dot once if missing — into the card header next to the
// server name (was previously the first child of the input row).
const row = entry.querySelector('.cookbook-server-row');
const titleEl = entry.querySelector('.cookbook-server-title');
if (!entry.querySelector('.cookbook-srv-status')) {
const dot = document.createElement('span');
dot.className = 'cookbook-srv-status';
dot.title = 'Click to test SSH';
dot.addEventListener('click', (e) => { e.stopPropagation(); _testServerConnection(entry); });
if (titleEl) titleEl.insertBefore(dot, titleEl.firstChild);
else if (row) row.insertBefore(dot, row.firstChild);
// The local server (readonly host) is always reachable — show it green
// without an SSH test.
const _hostEl = entry.querySelector('.cookbook-srv-host');
if (_hostEl && (_hostEl.readOnly || _hostEl.disabled)) {
dot.className = 'cookbook-srv-status ok';
dot.title = 'Local (this machine)';
}
}
const checkBtn = entry.querySelector('.cookbook-server-check-btn');
if (checkBtn && !checkBtn.dataset.bound) {
checkBtn.dataset.bound = '1';
checkBtn.addEventListener('click', (e) => {
e.stopPropagation();
_testServerConnection(entry);
});
}
// Default-server toggle: exclusive checkmark in the entry title. The chosen
// server is what Cookbook lands on (all dropdowns) on the next open.
const _defBtn = entry.querySelector('.cookbook-srv-default');
if (_defBtn && !_defBtn.dataset.bound) {
_defBtn.dataset.bound = '1';
_defBtn.addEventListener('click', (e) => {
e.stopPropagation();
const key = _defBtn.dataset.srvKey || '';
// Toggle off if it's already the default; otherwise make it the default.
_envState.defaultServer = (_envState.defaultServer === key) ? '' : key;
_persistEnvState();
document.querySelectorAll('.cookbook-srv-default').forEach(b => {
const on = !!_envState.defaultServer && b.dataset.srvKey === _envState.defaultServer;
b.classList.toggle('active', on);
// Keep the "default" label after the icon (don't overwrite it).
b.innerHTML = (on ? _MODELDIR_CHECK_ON : _MODELDIR_CHECK_OFF) + 'default';
b.title = on ? 'Default server — Cookbook opens here' : 'Make this the default server';
});
// Apply immediately so the dropdowns reflect it without reopening
// (inline — _applyServerSelection lives in cookbook.js and isn't imported here).
const _dk = _envState.defaultServer;
if (_dk) {
if (_dk === 'local') { _envState.remoteHost = ''; _envState.remoteServerKey = ''; _envState.env = 'none'; _envState.envPath = ''; _envState.platform = ''; }
else { const _s = _serverByVal(_dk); if (_s) { _envState.remoteHost = _s.host; _envState.remoteServerKey = _serverKey(_s); _envState.env = _s.env || 'none'; _envState.envPath = _s.envPath || ''; _envState.platform = _s.platform || ''; } }
_persistEnvState();
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (sel && sel.tagName === 'SELECT') sel.value = _currentServerValue();
});
}
const defaultSrv = _serverByVal(_envState.defaultServer);
uiModule.showToast(_envState.defaultServer
? 'Default server: ' + (_envState.defaultServer === 'local' ? 'Local' : (defaultSrv?.name || defaultSrv?.host || 'selected server'))
: 'Default server cleared');
});
}
const keyBtn = entry.querySelector('.cookbook-server-key-btn');
if (keyBtn && !keyBtn.dataset.bound) {
keyBtn.dataset.bound = '1';
keyBtn.addEventListener('click', async () => {
const panel = entry.querySelector('.cookbook-server-key-panel');
if (!panel) return;
const willOpen = panel.classList.contains('hidden');
panel.classList.toggle('hidden', !willOpen);
panel.style.display = willOpen ? 'flex' : '';
if (willOpen) await _populateServerKeyPanel(entry, false);
});
}
const keyGenBtn = entry.querySelector('.cookbook-server-key-gen');
if (keyGenBtn && !keyGenBtn.dataset.bound) {
keyGenBtn.dataset.bound = '1';
keyGenBtn.addEventListener('click', () => _populateServerKeyPanel(entry, true));
}
const keyCopyBtn = entry.querySelector('.cookbook-server-key-copy');
if (keyCopyBtn && !keyCopyBtn.dataset.bound) {
keyCopyBtn.dataset.bound = '1';
keyCopyBtn.addEventListener('click', async () => {
const cmd = entry.querySelector('.cookbook-server-key-command')?.value?.trim() || '';
if (!cmd || cmd.startsWith('Enter ')) return;
await _copyText(cmd);
uiModule.showToast('SSH setup command copied');
});
}
entry.querySelectorAll('input, select').forEach(el => {
el.addEventListener('change', () => {
const selectedBefore = _envState.remoteHost || '';
const entryHost = entry.querySelector('.cookbook-srv-host')?.value?.trim() || '';
_syncServers();
_rebuildServerSelect();
if (selectedBefore && selectedBefore === entryHost) {
_hwfitCache = null;
_hwfitFetch();
}
if (!entry.querySelector('.cookbook-server-key-panel')?.classList.contains('hidden')) {
_populateServerKeyPanel(entry, false);
}
});
});
// Auto-test when host or port blur
entry.querySelectorAll('.cookbook-srv-host, .cookbook-srv-port').forEach(el => {
el.addEventListener('blur', () => _testServerConnection(entry));
});
// Initial test for pre-filled rows (existing servers on tab load)
if (entry.querySelector('.cookbook-srv-host')?.value?.trim() && !entry.dataset.tested) {
entry.dataset.tested = '1';
_testServerConnection(entry);
}
// Cancel button on a brand-new server entry: discard it (no confirm — it's
// unsaved) and re-sync so the dropped blank server doesn't linger.
const cancelBtn = entry.querySelector('.cookbook-server-cancel-btn');
if (cancelBtn && !cancelBtn.dataset.bound) {
cancelBtn.dataset.bound = '1';
cancelBtn.addEventListener('click', () => {
entry.remove();
_syncServers();
_rebuildServerSelect();
_hwfitCache = null;
_hwfitFetch();
});
}
// Save button on a brand-new server entry: persist + confirm with a check.
const saveBtn = entry.querySelector('.cookbook-server-save-btn');
if (saveBtn && !saveBtn.dataset.bound) {
saveBtn.dataset.bound = '1';
saveBtn.addEventListener('click', () => {
_syncServers();
_rebuildServerSelect();
// Broadcast for anything outside the settings tab that depends on
// the server list (Serve dialog host picker, Running tasks, etc.).
// Without this the user had to hard-refresh to see the new entry
// in those other places.
try {
document.dispatchEvent(new CustomEvent('cookbook:servers-changed', {
detail: { servers: _envState.servers.slice() },
}));
} catch (_) {}
saveBtn.classList.add('saved');
saveBtn.innerHTML = 'Saved';
});
}
const rmBtn = entry.querySelector('.cookbook-server-rm');
if (rmBtn) rmBtn.addEventListener('click', async () => {
const name = entry.querySelector('.cookbook-srv-name')?.value?.trim()
|| entry.querySelector('.cookbook-srv-host')?.value?.trim()
|| 'this server';
let ok = true;
if (uiModule && uiModule.styledConfirm) {
ok = await uiModule.styledConfirm(`Remove "${name}"?`, { confirmText: 'Remove', danger: true });
} else {
ok = confirm(`Remove "${name}"?`);
}
if (!ok) return;
entry.remove();
_syncServers();
_rebuildServerSelect();
try {
document.dispatchEvent(new CustomEvent('cookbook:servers-changed', {
detail: { servers: _envState.servers.slice() },
}));
} catch (_) {}
_hwfitCache = null;
_hwfitFetch();
});
// Setup is owned by cookbook.js's delegated handler (Settings behavior:
// select server + open the Dependencies tab). Don't bind the inline-install
// handler here too, or one click would do two conflicting things.
const setupBtn = null;
if (setupBtn) {
setupBtn.addEventListener('click', async () => {
const host = entry.querySelector('.cookbook-srv-host')?.value?.trim();
const port = entry.querySelector('.cookbook-srv-port')?.value?.trim() || '';
if (!host) return;
setupBtn.disabled = true;
const origText = setupBtn.textContent;
setupBtn.textContent = 'Installing...';
try {
const res = await fetch('/api/cookbook/setup', {
method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ host, ssh_port: port || undefined }),
});
const data = await res.json();
if (data.ok) {
setupBtn.textContent = '\u2713 Done';
setupBtn.style.color = '#50fa7b';
uiModule.showToast(`Setup complete (${data.platform})`);
// Store detected platform on the server entry
if (data.platform) {
entry.dataset.platform = data.platform;
_syncServers();
// Show platform badge
const existingBadge = entry.querySelector('.cookbook-platform-badge');
if (existingBadge) existingBadge.remove();
const badge = document.createElement('span');
badge.className = 'cookbook-platform-badge';
badge.style.cssText = 'font-size:8px;padding:1px 5px;border-radius:3px;border:1px solid ' + (data.platform === 'windows' ? 'var(--cyan,#56b6c2)' : 'var(--green,#98c379)') + ';color:' + (data.platform === 'windows' ? 'var(--cyan,#56b6c2)' : 'var(--green,#98c379)') + ';opacity:0.7;white-space:nowrap;flex-shrink:0;';
badge.textContent = data.platform;
setupBtn.parentNode.insertBefore(badge, setupBtn);
}
// Auto-set Termux model dir
if (data.platform === 'termux') {
const container = entry.querySelector('.cookbook-modeldirs');
if (container) {
const existing = [...container.querySelectorAll('.cookbook-modeldir-tag')].map(t => t.textContent.replace('\u2716', '').replace('\u2715', '').trim());
const termuxDir = '/data/data/com.termux/files/home/models';
if (!existing.includes(termuxDir)) {
const tag = document.createElement('span');
tag.className = 'cookbook-modeldir-tag';
tag.dataset.dirIdx = existing.length;
tag.innerHTML = `${uiModule.esc(termuxDir)} \u2715`;
tag.querySelector('.cookbook-modeldir-rm').addEventListener('click', () => { tag.remove(); _syncServers(); });
const addBtn = container.querySelector('.cookbook-modeldir-add');
if (addBtn) container.insertBefore(tag, addBtn);
else container.appendChild(tag);
_syncServers();
}
}
}
} else {
setupBtn.textContent = 'Failed';
setupBtn.style.color = 'var(--red)';
uiModule.showError(data.error || data.output || 'Setup failed');
}
} catch (e) {
setupBtn.textContent = 'Error';
setupBtn.style.color = 'var(--red)';
uiModule.showError(e.message);
}
setTimeout(() => { setupBtn.disabled = false; setupBtn.textContent = origText; setupBtn.style.color = ''; }, 3000);
});
}
// Model dir add/remove
const addDirBtn = entry.querySelector('.cookbook-modeldir-add');
if (addDirBtn) addDirBtn.addEventListener('click', () => {
const raw = prompt('Model directory path:', '/data/models');
if (!raw) return;
const dir = raw.replaceAll('\u2715', '').replaceAll('\u2716', '').trim();
if (!dir) return;
// Don't add duplicates
const existing = [...entry.querySelectorAll('.cookbook-modeldir-tag')].some(t => (t.dataset.dir || t.textContent.trim()) === dir);
if (existing) return;
const container = entry.querySelector('.cookbook-modeldirs');
const tag = document.createElement('span');
tag.className = 'cookbook-modeldir-tag';
tag.dataset.dirIdx = container.querySelectorAll('.cookbook-modeldir-tag').length;
tag.dataset.dir = dir;
tag.innerHTML = `${_MODELDIR_CHECK_OFF} ${uiModule.esc(dir)} \u2716`;
tag.querySelector('.cookbook-modeldir-rm').addEventListener('click', () => { tag.remove(); _syncServers(); });
_wireModelDirTarget(entry, tag.querySelector('.cookbook-modeldir-dl'));
container.insertBefore(tag, addDirBtn);
_syncServers();
});
entry.querySelectorAll('.cookbook-modeldir-rm').forEach(rm => {
rm.addEventListener('click', () => { rm.closest('.cookbook-modeldir-tag').remove(); _syncServers(); });
});
// Download-target toggles: clicking one makes that dir the sole target for
// this server (or the default HF cache if it's the default dir).
entry.querySelectorAll('.cookbook-modeldir-dl').forEach(dl => _wireModelDirTarget(entry, dl));
}
// Mark a model-dir tag as this server's download target (exclusive), then
// persist. Clicking ANYWHERE on the tag (not just the check) selects it \u2014
// except the remove \u2716, which has its own handler.
function _wireModelDirTarget(entry, dlEl) {
if (!dlEl) return;
const tag = dlEl.closest('.cookbook-modeldir-tag');
if (!tag || tag.dataset.dlBound) return;
tag.dataset.dlBound = '1';
tag.style.cursor = 'pointer';
tag.addEventListener('click', (e) => {
if (e.target.closest('.cookbook-modeldir-rm')) return; // remove handled elsewhere
e.stopPropagation();
entry.querySelectorAll('.cookbook-modeldir-dl').forEach(d => {
d.classList.remove('active');
d.innerHTML = _MODELDIR_CHECK_OFF; // uncheck the others
d.closest('.cookbook-modeldir-tag')?.classList.remove('cookbook-modeldir-target');
d.title = 'Send downloads here';
});
dlEl.classList.add('active');
dlEl.innerHTML = _MODELDIR_CHECK_ON; // check the chosen one
tag.classList.add('cookbook-modeldir-target');
dlEl.title = 'Downloads go here';
_syncServers();
uiModule.showToast((dlEl.dataset.dlDir ? 'Downloads \u2192 ' + dlEl.dataset.dlDir : 'Downloads \u2192 default HF cache'));
});
}
document.querySelectorAll('.cookbook-server-entry').forEach(_wireServerEntry);
const addBtn = document.getElementById('cookbook-server-add');
if (addBtn && !addBtn.dataset.bound) {
addBtn.dataset.bound = '1';
addBtn.addEventListener('click', () => {
const list = document.getElementById('cookbook-servers-list');
if (!list) return;
const idx = list.children.length;
// Build the new entry with the SAME template as existing servers (Model
// Directory header, default checkmark, platform icon) \u2014 isNew swaps the
// delete button for a Save button. forceRemote keeps it editable.
const blank = { host: '', name: '', port: '', env: 'none', envPath: '', platform: '', modelDirs: ['~/.cache/huggingface/hub'] };
const wrap = document.createElement('div');
wrap.innerHTML = _serverEntryHtml(blank, idx, _envState.defaultServer || '', true, true);
const entry = wrap.firstElementChild;
list.appendChild(entry);
_wireServerEntry(entry);
_syncServers();
// Also refresh the server select dropdown
_rebuildServerSelect();
entry.querySelector('.cookbook-srv-host')?.focus();
});
}
// Server selector dropdown
const serverSelect = document.getElementById('hwfit-server-select');
if (serverSelect && !serverSelect.dataset.bound) {
serverSelect.dataset.bound = '1';
serverSelect.addEventListener('change', () => {
const val = serverSelect.value;
if (val === 'local') {
_envState.remoteHost = '';
_envState.remoteServerKey = '';
_envState.env = 'none';
_envState.envPath = '';
} else {
const s = _serverByVal(val);
if (s) {
_envState.remoteHost = s.host;
_envState.remoteServerKey = _serverKey(s);
_envState.env = s.env;
_envState.envPath = s.envPath;
}
}
_persistEnvState();
// Keep the other server dropdowns (Download / Cache / Deps) in sync. The
// download-input button reads #hwfit-dl-server *directly*, so without this
// it kept its old value and downloads went to the wrong host even
// though the scan here correctly switched to the selected server.
document.querySelectorAll('#hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
if (!sel || sel.tagName !== 'SELECT') return;
sel.value = _currentServerValue();
});
_hwfitCache = null;
// Reset GPU-toggle state (no flicker) so the new server's hardware re-renders.
_resetGpuToggleState();
_hwfitFetch();
});
}
}