// ============================================
// COOKBOOK HWFIT SUB-MODULE
// "What Fits?" hardware model fitting UI
// ============================================
import {
_envState,
_persistEnvState,
esc,
modelLogo,
_detectBackend,
_runModelDownload,
_runPanelCmd,
_buildDownloadCmd,
_addTask,
_renderRunningTab,
_detectToolParser,
_lastCacheHost,
_setLastCacheHost,
_serverByVal,
_shellQuote,
_MODELDIR_CHECK_ON,
_MODELDIR_CHECK_OFF,
_serverEntryHtml,
_copyText,
// Import cookbook.js WITHOUT a ?v= query — the same plain specifier every other
// importer uses. A query mismatch loads cookbook.js twice as two separate modules
// (two _envState objects), which silently sent downloads to the wrong server.
} from './cookbook.js';
import uiModule from './ui.js';
import spinnerModule from './spinner.js';
// ── What Fits? (hardware model fitting) ──
export let _hwfitCache = null;
export let _hwfitDebounce = null;
export let _cachedModelIds = null; // repo IDs already downloaded
// Bumped on every _hwfitFetch; a slow scan (remote SSH probe can take ~10s)
// checks this before rendering so a stale response can't clobber a newer one
// after the user has switched servers.
let _hwfitFetchToken = 0;
let _dismissedHwChips = new Set();
// Permanently removed (X-clicked) chips. Separate from _dismissedHwChips
// so the ranker treats "off" and "removed" the same (both ignore the
// hardware) but the UI keeps "off" chips visible to toggle back on,
// while "removed" ones don't render at all until next rescan.
let _removedHwChips = new Set();
export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden
function _firstGgufSource(model) {
const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
return sources.find(src => src && src.repo) || null;
}
function _looksLikeGgufRepo(model) {
const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
}
function _downloadSourceRepo(model, backend) {
if (backend === 'llamacpp') {
const ggufSource = _firstGgufSource(model);
if (ggufSource) return { repo: ggufSource.repo, kind: 'GGUF' };
if (_looksLikeGgufRepo(model)) {
const repo = model?.quant_repo || model?.repo_id || model?.name;
if (repo) return { repo, kind: 'GGUF' };
}
}
return { repo: model?.quant_repo || model?.name || '', kind: '' };
}
// Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a
// (possibly different) server, WITHOUT clearing the markup now — clearing it made
// the buttons flicker out and back in. The old buttons stay visible until the
// fresh scan returns and swaps them in place. Lives here (not cookbook.js) because
// _gpuToggleTotal is a module-local binding that can't be reassigned by importers.
export function _resetGpuToggleState(clearDismissed = true) {
if (clearDismissed) {
_dismissedHwChips = new Set();
_removedHwChips = new Set();
}
const tc = document.getElementById('hwfit-gpu-toggles');
if (tc) {
tc._originalSystem = null;
tc._activeCount = undefined;
tc._activeGroup = undefined;
tc._groups = null;
tc._builtGroup = undefined;
delete tc.dataset.rendered;
}
_gpuToggleTotal = 0;
}
// Trim vendor noise so a pool label reads "RTX 4090 D" not "NVIDIA GeForce RTX 4090 D".
function _shortGpuName(name) {
return String(name || 'GPU')
.replace(/^NVIDIA\s+GeForce\s+/i, '')
.replace(/^NVIDIA\s+/i, '')
.replace(/^AMD\s+(Radeon\s+)?/i, '')
.trim() || 'GPU';
}
// Powers of two up to the pool size, plus the exact pool size — these are the
// only safe vLLM --tensor-parallel-size values (TP must divide the GPU count and
// the model's attention heads). Never offer a count we can't actually serve.
function _validTpCounts(poolSize) {
const out = [1, 2, 4, 8, 16].filter(n => n <= poolSize);
if (poolSize > 0 && !out.includes(poolSize)) out.push(poolSize);
return out;
}
export function _renderGpuToggles(system) {
const container = document.getElementById('hwfit-gpu-toggles');
if (!container) return;
const groups = Array.isArray(system.gpu_groups) ? system.gpu_groups : [];
// Box-wide GPU total, stable across fetches. The route shrinks system.gpu_count
// to the *active pool* once we pin one, so derive the total from the (immutable)
// group list or the raw detection, never from the possibly-overridden count.
const total = system.detected_gpu_count
|| (groups.length ? groups.reduce((s, g) => s + (g.count || 0), 0) : (system.gpu_count || 0));
if (total <= 0 && !system.has_gpu) {
container.innerHTML = '';
container._groups = null;
_gpuToggleTotal = 0;
return;
}
if (!_gpuToggleTotal) _gpuToggleTotal = total;
container._groups = groups;
if (container._activeGroup === undefined) container._activeGroup = 0; // auto = largest pool
const heterogeneous = groups.length > 1;
// Rebuild only when the hardware shape changes OR the chosen pool changes (the
// count buttons are pool-specific). Otherwise a re-scan would flicker them.
const sig = `${total}|${groups.map(g => g.count + ':' + g.vram_each).join(',')}`;
if (container.dataset.rendered === sig && container._builtGroup === container._activeGroup) return;
container.dataset.rendered = sig;
container._builtGroup = container._activeGroup;
const grp = groups[container._activeGroup] || groups[0]
|| { count: total, vram_each: 0, name: system.gpu_name || 'GPU' };
const poolSize = grp.count || total;
let html = '';
if (heterogeneous) {
html += `';
}
const validCounts = _validTpCounts(poolSize);
const maxGpu = validCounts.length ? validCounts[validCounts.length - 1] : 0;
html += '';
const hasExplicitCount = typeof container._activeCount === 'number';
for (const n of validCounts) {
const text = n === 1 ? 'GPU' : n + ' GPU';
const isActive = hasExplicitCount ? (n === container._activeCount) : (container._activeCount === undefined && n === maxGpu);
html += ``;
}
container.innerHTML = html;
// Pool dropdown: switch pools, reset the count to the new pool's max, rebuild.
const sel = container.querySelector('#hwfit-gpu-group');
if (sel) {
sel.addEventListener('change', () => {
container._activeGroup = parseInt(sel.value) || 0;
container._activeCount = undefined; // default to the new pool's max
delete container.dataset.rendered; // force a count-button rebuild
_renderGpuToggles(system);
_hwfitCache = null;
_hwfitFetch();
});
}
if (!container._gpuBound) {
container._gpuBound = true;
container.addEventListener('click', (e) => {
const btn = e.target.closest('.hwfit-gpu-btn');
if (!btn) return;
const count = parseInt(btn.dataset.count);
const wasActive = btn.classList.contains('active') && container._activeCount === count;
container.querySelectorAll('.hwfit-gpu-btn').forEach(b => b.classList.remove('active'));
if (wasActive) {
container._activeCount = null;
} else {
btn.classList.add('active');
container._activeCount = count;
// Auto-set quant based on hardware selection
const quantSel = document.getElementById('hwfit-quant');
if (quantSel) {
if (count <= 1) {
quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot
} else if (String(system?.backend || '').toLowerCase() === 'rocm') {
quantSel.value = 'Q4_K_M'; // ROCm default stays GGUF/local-safe; AWQ is explicit only
} else {
quantSel.value = 'AWQ-4bit'; // Multi-GPU -> AWQ for vLLM
}
}
}
_hwfitCache = null;
_hwfitFetch();
});
}
}
// --- Scan persistence (survives page reloads) ----------------------------
// The backend caches hardware detection per host (~30 min) but that's lost on a
// service restart, and a reload still shows a spinner while it re-fetches. Cache
// the last successful /models result per param-signature in localStorage so a
// reload paints instantly, then we refresh in the background and swap.
const _SCAN_CACHE_KEY = 'hwfit_scan_cache_v1';
const _MANUAL_HW_KEY = 'hwfit_manual_hardware_v1';
const _SCAN_CACHE_MAX = 12; // keep the newest N signatures
const _SCAN_CACHE_TTL = 6 * 3600 * 1000; // 6 h — hardware rarely changes
function _manualHwState() {
try {
const s = JSON.parse(localStorage.getItem(_MANUAL_HW_KEY) || '{}');
if (s && (s.mode === 'gpu' || s.mode === 'ram')) return s;
} catch {}
return null;
}
function _saveManualHwState(s) {
try {
if (!s || !s.mode) localStorage.removeItem(_MANUAL_HW_KEY);
else localStorage.setItem(_MANUAL_HW_KEY, JSON.stringify(s));
} catch {}
}
function _manualHwParams() {
const s = _manualHwState();
if (!s) return {};
return {
manual_mode: s.mode,
manual_gpu_count: s.mode === 'gpu' ? String(s.gpuCount || 1) : '',
manual_vram_gb: s.mode === 'gpu' ? String(s.vramGb || 8) : '',
manual_ram_gb: s.ramGb ? String(s.ramGb) : '',
manual_backend: s.mode === 'gpu' ? (s.backend || 'cuda') : '',
};
}
function _manualNumber(value, fallback) {
const raw = String(value || '').replace(',', '.');
const match = raw.match(/-?\d+(?:\.\d+)?/);
if (!match) return fallback;
const n = Number(match[0]);
return Number.isFinite(n) && n > 0 ? n : fallback;
}
function _manualOptionalNumber(value) {
const raw = String(value || '').replace(',', '.');
const match = raw.match(/-?\d+(?:\.\d+)?/);
if (!match) return null;
const n = Number(match[0]);
return Number.isFinite(n) && n > 0 ? n : null;
}
function _manualHwLabel(s) {
if (!s) return '';
// Manual mode is a "what if" SIMULATOR — values REPLACE detected
// hardware (matches server-side _apply_manual_hardware). Label
// phrased as plain "X GB" instead of additive "+X GB" so the user
// sees the simulated TOTAL, not an addition.
const ram = s.ramGb ? ` · ${s.ramGb} GB RAM` : '';
if (s.mode === 'ram') return `Manual: ${s.ramGb || 0} GB RAM only`;
const gpus = `${s.gpuCount || 1} GPU${Number(s.gpuCount || 1) === 1 ? '' : 's'}`;
return `Manual: ${gpus} · ${s.vramGb || 8} GB VRAM each${ram}`;
}
function _manualDisplaySystem(sys, manual) {
const base = { ...(sys || {}) };
if (!manual) return base;
base.manual_hardware = true;
// REPLACE detected RAM with the manual total. Previously this added
// on top of detected, which (a) contradicted the new server-side
// "replace" behavior and (b) made the chip's displayed total not
// match what was actually being ranked against.
if (manual.ramGb) {
base.available_ram_gb = Number(manual.ramGb);
base.total_ram_gb = Number(manual.ramGb);
}
if (manual.mode === 'ram') {
// RAM-only simulation — wipe GPU side so the chip display matches
// what the server is ranking against (CPU/RAM paths only).
base.has_gpu = false;
base.gpu_name = null;
base.gpu_vram_gb = 0;
base.gpu_count = 0;
return base;
}
if (manual.mode !== 'ram') {
const count = Number(manual.gpuCount || 1);
const vram = Number(manual.vramGb || 8);
const backend = (manual.backend || 'cuda').toUpperCase();
base.gpu_name = `Simulated ${backend} GPU` + (count > 1 ? ` × ${count}` : '');
base.gpu_vram_gb = Math.round(vram * count * 10) / 10;
base.gpu_count = count;
base.backend = manual.backend || 'cuda';
}
return base;
}
// Signature of everything that affects the result list, so we never paint a
// cached list under mismatched filters.
function _scanSig() {
const sortEl = document.getElementById('hwfit-sort');
const tc = document.getElementById('hwfit-gpu-toggles');
return JSON.stringify({
h: _envState.remoteHost || '',
u: document.getElementById('hwfit-usecase')?.value || '',
s: document.getElementById('hwfit-search')?.value?.trim() || '',
o: sortEl?.value || 'score',
r: sortEl?.dataset.reverse === '1' ? 1 : 0,
q: document.getElementById('hwfit-quant')?.value || '',
g: (tc && typeof tc._activeCount === 'number') ? String(tc._activeCount) : '',
gg: (tc && tc._activeGroup) ? String(tc._activeGroup) : '',
m: _manualHwParams(),
d: Array.from(_dismissedHwChips).sort(),
});
}
function _readScanCache(sig) {
try {
const all = JSON.parse(localStorage.getItem(_SCAN_CACHE_KEY) || '{}');
const e = all[sig];
if (e && (Date.now() - e.ts) < _SCAN_CACHE_TTL) return e.data;
} catch {}
return null;
}
function _writeScanCache(sig, data) {
try {
const all = JSON.parse(localStorage.getItem(_SCAN_CACHE_KEY) || '{}');
all[sig] = { ts: Date.now(), data: { system: data.system, models: data.models } };
const keys = Object.keys(all);
if (keys.length > _SCAN_CACHE_MAX) {
keys.sort((a, b) => (all[a].ts || 0) - (all[b].ts || 0));
for (const k of keys.slice(0, keys.length - _SCAN_CACHE_MAX)) delete all[k];
}
localStorage.setItem(_SCAN_CACHE_KEY, JSON.stringify(all));
} catch {}
}
// Render a clear scan-failure card into the model list: which server failed, the
// underlying reason (small), and a Retry button that forces a fresh probe. Used
// for both the backend-reported error (SSH/probe failure) and network failures,
// instead of dumping a raw one-line message.
function _hwfitShowError(list, host, detail) {
if (!list) return;
const where = host ? esc(host) : 'this machine';
const div = document.createElement('div');
div.className = 'hwfit-loading';
div.style.cssText = 'flex-direction:column;gap:8px;text-align:center;';
div.innerHTML =
`
Couldn't scan ${where}
`
+ (detail ? `
${esc(detail)}
` : '')
+ ``;
list.innerHTML = '';
list.appendChild(div);
const rb = div.querySelector('#hwfit-retry');
if (rb) rb.addEventListener('click', () => { _resetGpuToggleState(); _hwfitFetch(true); });
}
// Client-side "Engine" filter (llama.cpp / vLLM / SGLang). Empty = show all.
// Uses the same _detectBackend() the serve commands use, so what you filter to
// is exactly what would be launched. Pure view filter — no refetch needed.
function _applyEngineFilter(models) {
const want = document.getElementById('hwfit-engine')?.value || '';
if (!want || !Array.isArray(models)) return models || [];
return models.filter(m => {
try { return _detectBackend(m).backend === want; } catch { return true; }
});
}
export async function _hwfitFetch(fresh = false) {
const _tk = ++_hwfitFetchToken;
const useCase = document.getElementById('hwfit-usecase')?.value || '';
const search = document.getElementById('hwfit-search')?.value?.trim() || '';
const remoteHost = _envState.remoteHost || '';
const list = document.getElementById('hwfit-list');
const hw = document.getElementById('hwfit-hw');
if (!list) return;
const hasManualOrDismissed = !!_manualHwState() || _dismissedHwChips.size > 0;
if (hasManualOrDismissed) fresh = true;
// Instant paint from the persisted cache (skipped on a forced Rescan), so a
// reload shows the last result with no spinner. We still fetch fresh below and
// swap it in. If there's no cache hit, fall back to the spinner.
const _sig = _scanSig();
const _cached = fresh ? null : _readScanCache(_sig);
const wp = spinnerModule.createWhirlpool(18);
if (_cached) {
_hwfitCache = _cached;
_hwfitRenderHw(hw, _cached.system);
_hwfitRenderList(list, _applyEngineFilter(_cached.models));
} else {
// Show spinner while scanning — stack the spinner above a text label
// (the .hwfit-loading class is a centered flex ROW, so force column here).
const loadingDiv = document.createElement('div');
loadingDiv.className = 'hwfit-loading';
loadingDiv.style.flexDirection = 'column';
loadingDiv.style.gap = '6px';
loadingDiv.appendChild(wp.element);
// Text label like the other cookbook tabs: "Loading…", then if the scan runs
// long (remote SSH hardware probe), switch to "Scanning hardware…".
const loadingLbl = document.createElement('div');
loadingLbl.textContent = 'Loading…';
loadingLbl.style.cssText = 'text-align:center;opacity:0.5;font-size:11px;';
loadingDiv.appendChild(loadingLbl);
setTimeout(() => { if (loadingLbl.isConnected) loadingLbl.textContent = 'Scanning hardware…'; }, 2000);
list.innerHTML = '';
list.appendChild(loadingDiv);
_hwfitCache = null; // no instant paint — clear until the fetch returns
}
// Only fetch cached model IDs when server changes, not on every search/sort
if (!_cachedModelIds || _lastCacheHost() !== remoteHost) {
_setLastCacheHost(remoteHost);
const _cacheSrv = _envState.servers.find(s => s.host === remoteHost);
const _cachePort = _cacheSrv?.port || '';
const _cacheParams = new URLSearchParams({ host: remoteHost }); if (_cachePort) _cacheParams.set('ssh_port', _cachePort); if (_cacheSrv?.platform) _cacheParams.set('platform', _cacheSrv.platform);
fetch(`/api/model/cached?${_cacheParams}`, { credentials: 'same-origin' })
.then(r => r.json())
.then(d => {
_cachedModelIds = new Set((d.models || []).map(m => m.repo_id));
// Re-mark rows if already rendered
list.querySelectorAll('.hwfit-row[data-model]').forEach(row => {
const name = row.dataset.model;
if (_cachedModelIds.has(name) || [..._cachedModelIds].some(id => id.endsWith('/' + name?.split('/').pop()))) {
const nameEl = row.querySelector('.hwfit-name');
if (nameEl && !nameEl.querySelector('.hwfit-dl-dot')) {
nameEl.insertAdjacentHTML('beforeend', '\u25CF');
}
}
});
}).catch(() => {});
}
try {
const sortBy = document.getElementById('hwfit-sort')?.value || 'score';
const quantPref = document.getElementById('hwfit-quant')?.value || '';
// Get active GPU count from toggles
const toggleContainer = document.getElementById('hwfit-gpu-toggles');
let gpuCountOverride = '';
if (!hasManualOrDismissed && toggleContainer && typeof toggleContainer._activeCount === 'number') {
gpuCountOverride = String(toggleContainer._activeCount);
}
// Which homogeneous GPU pool to rank against (heterogeneous boxes only).
let gpuGroupOverride = '';
if (!hasManualOrDismissed && toggleContainer && toggleContainer._activeGroup) {
gpuGroupOverride = String(toggleContainer._activeGroup);
}
const params = new URLSearchParams({ limit: '80', sort: sortBy });
if (fresh) params.set('fresh', '1'); // bypass the hardware-scan cache
if (search) params.set('search', search);
if (remoteHost) {
params.set('host', remoteHost);
const _srv = _envState.servers.find(s => s.host === remoteHost);
const _hp = _srv?.port || '';
if (_hp) params.set('ssh_port', _hp);
if (_srv?.platform) params.set('platform', _srv.platform);
}
if (gpuCountOverride !== '') params.set('gpu_count', gpuCountOverride);
if (gpuGroupOverride !== '') params.set('gpu_group', gpuGroupOverride);
if (_dismissedHwChips.has('gpu') || _dismissedHwChips.has('vram')) params.set('ignore_detected_gpu', 'true');
if (_dismissedHwChips.has('ram')) params.set('ignore_detected_ram', 'true');
const manualParams = _manualHwParams();
Object.entries(manualParams).forEach(([k, v]) => {
if (v !== '') params.set(k, v);
});
if (hasManualOrDismissed) params.set('_hw_override_ts', String(Date.now()));
// Image models use a separate registry/endpoint
const isImageMode = useCase === 'image_gen';
if (!isImageMode) {
if (useCase) params.set('use_case', useCase);
if (quantPref) params.set('quant', quantPref);
}
const endpoint = isImageMode ? `/api/hwfit/image-models?${params}` : `/api/hwfit/models?${params}`;
const res = await fetch(endpoint);
// A newer scan started while this one was in flight (user switched servers
// mid-probe) — drop this stale response so it can't clobber the new one.
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
if (!res.ok) throw new Error(res.statusText);
let data = await res.json();
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
if (!isImageMode && quantPref && !data.error && Array.isArray(data.models) && data.models.length === 0) {
const fallbackParams = new URLSearchParams(params);
fallbackParams.delete('quant');
const fallbackRes = await fetch(`/api/hwfit/models?${fallbackParams}`);
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
if (fallbackRes.ok) {
const fallbackData = await fallbackRes.json();
if (!fallbackData.error && Array.isArray(fallbackData.models) && fallbackData.models.length > 0) {
data = fallbackData;
const quantSel = document.getElementById('hwfit-quant');
if (quantSel) quantSel.value = '';
}
}
}
// Normalize image model fields to match LLM renderer expectations
if (isImageMode && data.models) {
data.models = data.models.map(m => ({
...m,
name: m.id || m.name,
fit_level: m.fit || 'no_fit',
parameter_count: m.params_b ? m.params_b + 'B' : '?',
required_gb: m.vram_needed || 0,
speed_tps: 0,
context: 0,
run_mode: m.capabilities?.[0] || 'image',
is_image_gen: true,
quant: m.quant || m.default_quant || 'BF16',
quant_repo: m.quant_repo || null,
}));
}
wp.destroy();
if (data.error) {
// Keep the instantly-painted cache if we had one — don't replace good data
// with an error on a transient probe failure (stale-while-revalidate).
if (!_cached) { _hwfitShowError(list, remoteHost, data.error); if (hw) hw.innerHTML = ''; }
return;
}
_hwfitCache = data;
_hwfitRenderHw(hw, data.system);
// Sort client-side by the active column so the highest↔lowest toggle is
// deterministic (the previous array .reverse() didn't reliably flip).
// 1st click on a column = highest first; clicking it again = lowest first.
if (!isImageMode) {
const sortSel = document.getElementById('hwfit-sort');
const sortKey = sortSel?.value || 'score';
const asc = sortSel?.dataset.reverse === '1'; // reversed → ascending (lowest first)
const field = { score: 'score', vram: 'required_gb', speed: 'speed_tps', params: 'params_b', context: 'context' }[sortKey] || 'score';
data.models.sort((a, b) => {
if (sortKey === 'fit') {
const rank = { perfect: 4, good: 3, marginal: 2, too_tight: 1, no_fit: 0 };
const av = rank[a.fit_level] || 0, bv = rank[b.fit_level] || 0;
return asc ? av - bv : bv - av;
}
const av = Number(a[field]) || 0, bv = Number(b[field]) || 0;
return asc ? av - bv : bv - av;
});
}
_hwfitRenderList(list, _applyEngineFilter(data.models));
// Persist this result so the next page load can paint it instantly.
_writeScanCache(_sig, data);
// Render GPU toggles — only on first scan (no override active)
if (toggleContainer && !toggleContainer._originalSystem) {
// Only trust the system info if no GPU override was applied
if (toggleContainer._activeCount === undefined) {
toggleContainer._originalSystem = { ...data.system };
_renderGpuToggles(toggleContainer._originalSystem);
}
}
} catch (e) {
wp.destroy();
// Same stale-while-revalidate rule: only surface the error if we have nothing
// already on screen from the cache.
if (!_cached) _hwfitShowError(list, remoteHost, e.message);
}
}
export function _hwfitRenderHw(el, sys) {
if (!el || !sys) return;
// Cache system info globally so other modules can read VRAM without refetching
try { window._hwfitSystemCache = sys; } catch {}
// Show the hardware row when we have data
const hwRow = document.getElementById('hwfit-hw-row');
if (hwRow) hwRow.style.display = 'flex';
const gpuCount = sys.gpu_count || 0;
// gpu_error = nvidia-smi present but failing (e.g. driver/library version
// mismatch). Surface it instead of the misleading "No GPU" — plain text
// label, full error in the tooltip.
// Chip rendering: split into a clickable body (toggle off / on) and a
// separate × button (fully remove from view + treat as dismissed for
// ranking). The body's "off" state is just visually dimmed — the
// chip stays visible so you can flip it back on without re-scanning.
const chip = (key, label, title = 'Click to toggle off (X to hide)') => {
if (_removedHwChips.has(key)) return '';
const dim = _dismissedHwChips.has(key) ? ' hwfit-hw-chip-off' : '';
return (
``
+ ``
+ ``
+ ``
);
};
let gpuChip;
if (sys.gpu_name) {
// Mixed-GPU boxes (#711): `${gpuCount}x ${gpu_name}` uses gpus[0].name for
// every card, so a 4090+3060 reads as "2x RTX 4090". Use gpu_groups (the
// backend already groups identical cards) to render each pool separately
// and put the per-card index+VRAM into the tooltip so it's actually
// useful for picking CUDA_VISIBLE_DEVICES.
const groups = Array.isArray(sys.gpu_groups) ? sys.gpu_groups : [];
// Shorten vendor prefixes so a mixed-GPU label fits in the chip row
// without overflowing. Single-GPU label still shows the full name
// (that's what users are used to seeing). Tooltip carries the full
// unmodified names regardless, so no information is lost.
const _shortGpuName = (n) => String(n || '')
.replace(/^NVIDIA\s+GeForce\s+/i, '')
.replace(/^NVIDIA\s+/i, '')
.replace(/^AMD\s+Radeon\s+/i, '')
.replace(/^AMD\s+/i, '')
.replace(/^Intel\s+/i, '');
let label;
if (groups.length > 1) {
// Heterogeneous: "1× RTX 4090 + 1× RTX 3060"
label = groups.map(g => `${g.count}× ${esc(_shortGpuName(g.name))}`).join(' + ');
} else if (gpuCount > 1) {
label = `${gpuCount}× ${esc(sys.gpu_name)}`;
} else {
label = esc(sys.gpu_name);
}
const gpus = Array.isArray(sys.gpus) ? sys.gpus : [];
const tip = gpus.length
? gpus.map(g => `GPU ${g.index}: ${g.name} · ${(+g.vram_gb).toFixed(1)} GB`).join('\n')
: 'Click to toggle off (X to hide)';
gpuChip = chip('gpu', label, tip);
} else if (sys.gpu_error) {
gpuChip = _removedHwChips.has('gpu')
? ''
: (() => {
const dim = _dismissedHwChips.has('gpu') ? ' hwfit-hw-chip-off' : '';
return (
``
+ ``
+ ``
+ ``
);
})();
} else {
gpuChip = chip('gpu', 'No GPU');
}
const vram = sys.gpu_vram_gb ? `${sys.gpu_vram_gb.toFixed(1)} GB VRAM` : '';
const ram = `${sys.available_ram_gb?.toFixed(1) || '?'} / ${sys.total_ram_gb?.toFixed(1) || '?'} GB RAM`;
const cores = `${sys.cpu_cores || '?'} cores`;
const manual = _manualHwState();
const manualChip = (sys.manual_hardware || manual)
? ``
+ ``
+ ``
+ ``
: '';
el.innerHTML = gpuChip
+ (vram ? chip('vram', vram) : '')
+ chip('ram', ram)
+ chip('cores', cores)
+ chip('backend', esc(sys.backend || ''))
+ manualChip;
// Body click → toggle "off" (dimmed, still visible). Membership of
// _dismissedHwChips is what the ranker reads, so both add+remove
// here also flips the model list. The manual chip is excluded —
// dimming "manual" has no ranking effect (the key isn't checked),
// so click-to-toggle there would feel broken. Use × to clear it.
el.querySelectorAll('.hwfit-hw-chip-toggle').forEach(btn => {
btn.addEventListener('click', (e) => {
e.stopPropagation();
const key = btn.dataset.hwChip;
if (!key || key === 'manual') return;
const row = btn.closest('.hwfit-hw-chip-row');
if (_dismissedHwChips.has(key)) {
_dismissedHwChips.delete(key);
row?.classList.remove('hwfit-hw-chip-off');
} else {
_dismissedHwChips.add(key);
row?.classList.add('hwfit-hw-chip-off');
}
_resetGpuToggleState(false);
_hwfitCache = null;
_hwfitFetch(true);
});
});
// × button → fully remove the chip from view AND treat it as
// dismissed for ranking purposes (until next rescan).
el.querySelectorAll('.hwfit-hw-chip-x').forEach(btn => {
btn.addEventListener('click', (e) => {
e.stopPropagation();
const key = btn.dataset.hwChip;
if (!key) return;
// The manual-hardware chip needs special teardown: clear the
// saved manual state so the chip doesn't re-render on the next
// fetch from localStorage. Routes through clearManual() which
// also collapses the edit panel.
if (key === 'manual') {
_saveManualHwState(null);
btn.closest('.hwfit-hw-chip-row')?.remove();
document.getElementById('hwfit-manual-panel')?.classList.add('hidden');
_resetGpuToggleState();
_hwfitCache = null;
_hwfitFetch(true);
return;
}
_removedHwChips.add(key);
_dismissedHwChips.add(key);
btn.closest('.hwfit-hw-chip-row')?.remove();
_resetGpuToggleState(false);
_hwfitCache = null;
_hwfitFetch(true);
});
});
_wireManualHardwareControls(el);
}
function _wireManualHardwareControls(el) {
const btn = document.getElementById('hwfit-hw-manual-btn');
const panel = document.getElementById('hwfit-manual-panel');
if (!btn || !panel) return;
const clearManual = () => {
_saveManualHwState(null);
el.querySelector('.hwfit-hw-chip-manual')?.remove();
panel.classList.add('hidden');
_resetGpuToggleState();
_hwfitCache = null;
_hwfitFetch(true);
};
const manual = _manualHwState();
btn.textContent = 'EDIT';
if (manual) {
panel.querySelector('.hwfit-manual-mode').value = manual.mode || 'gpu';
panel.querySelector('.hwfit-manual-backend').value = manual.backend || 'cuda';
}
const syncMode = () => {
const isRam = panel.querySelector('.hwfit-manual-mode')?.value === 'ram';
panel.querySelector('.hwfit-manual-gpus')?.closest('label')?.style.setProperty('display', isRam ? 'none' : '');
panel.querySelector('.hwfit-manual-vram')?.closest('label')?.style.setProperty('display', isRam ? 'none' : '');
const backend = panel.querySelector('.hwfit-manual-backend');
if (backend) backend.style.display = isRam ? 'none' : '';
};
if (!btn._hwfitManualBound) {
btn._hwfitManualBound = true;
btn.addEventListener('click', () => {
panel.classList.toggle('hidden');
syncMode();
});
}
el.querySelector('.hwfit-hw-chip-toggle[data-hw-chip="manual"]')?.addEventListener('click', () => {
panel.classList.remove('hidden');
syncMode();
});
if (!panel._hwfitManualBound) {
panel._hwfitManualBound = true;
panel.querySelector('.hwfit-manual-mode')?.addEventListener('change', syncMode);
panel.querySelector('.hwfit-hw-manual-save')?.addEventListener('click', () => {
const mode = panel.querySelector('.hwfit-manual-mode')?.value || 'gpu';
const gpuCount = _manualNumber(panel.querySelector('.hwfit-manual-gpus')?.value, 1);
const vramGb = _manualNumber(panel.querySelector('.hwfit-manual-vram')?.value, 8);
const ramGb = _manualOptionalNumber(panel.querySelector('.hwfit-manual-ram')?.value);
const backend = panel.querySelector('.hwfit-manual-backend')?.value || 'cuda';
const manual = { mode, gpuCount, vramGb, ramGb, backend };
_saveManualHwState(manual);
_resetGpuToggleState();
_hwfitCache = null;
panel.classList.add('hidden');
_hwfitRenderHw(el, _manualDisplaySystem(window._hwfitSystemCache, manual));
_hwfitFetch(true);
});
panel.querySelector('.hwfit-hw-manual-clear')?.addEventListener('click', clearManual);
}
syncMode();
}
export const _fitColors = { perfect: 'var(--green, #50fa7b)', good: 'var(--yellow, #f1fa8c)', marginal: 'var(--orange, #ffb86c)', too_tight: 'var(--red, #ff5555)' };
export const _hwfitColumns = [
{ key: 'fit', label: 'Fit', cls: 'hwfit-fit' },
{ key: null, label: 'Model', cls: 'hwfit-name' },
{ key: 'params',label: 'Param', cls: 'hwfit-c-params' },
{ key: null, label: 'Quant', cls: 'hwfit-c-quant' },
{ key: 'vram', label: 'VRAM', cls: 'hwfit-c-vram' },
{ key: 'context',label: 'Ctx', cls: 'hwfit-c-ctx' },
{ key: 'speed', label: 'Speed', cls: 'hwfit-c-speed' },
{ key: 'score', label: 'Score', cls: 'hwfit-c-score' },
{ key: null, label: 'Mode', cls: 'hwfit-c-mode' },
];
export function _hwfitRenderList(el, models) {
if (!el) return;
models = models || [];
if (!models.length) {
// Disambiguate WHY the list is empty so capable servers don't read as "too weak":
// active filters vs. a likely under-reported probe vs. genuinely low hardware.
const sys = _hwfitCache?.system;
const hasHw = sys && ((sys.gpu_vram_gb || 0) > 0 || (sys.total_ram_gb || 0) > 8);
const hasFilters = !!(document.getElementById('hwfit-search')?.value?.trim()
|| document.getElementById('hwfit-usecase')?.value
|| document.getElementById('hwfit-quant')?.value
|| document.getElementById('hwfit-engine')?.value);
let msg;
if (hasFilters) msg = 'No models match these filters — try clearing the search, use-case, quant, or engine.';
else if (hasHw) msg = 'No models fit — the hardware probe may have under-reported. Try Rescan.';
else msg = 'No models fit your hardware';
el.innerHTML = `
`;
html += `${esc(fitLabel)}`;
html += `${modelLogo(m.name)}${esc(m.name?.split('/').pop() || m.name)}${moeBadge}${imgBadge}${dlDot}`;
html += `${esc(pcount)}`;
html += `${esc(m.quant || '?')}`;
html += `${vramLabel}`;
html += `${m.is_image_gen ? '\u2014' : ctx}`;
html += `${m.is_image_gen ? '\u2014' : tps + ' t/s'}`;
html += `${score}`;
html += `${m.is_image_gen ? 'image' : esc(modeLabel)}`;
html += `
`;
}
el.innerHTML = html;
// Click row → expand inline action panel
el.querySelectorAll('.hwfit-row:not(.hwfit-header)').forEach(row => {
row.addEventListener('click', () => {
const name = row.dataset.model;
if (!name) return;
// Find model data from cache
const modelData = (_hwfitCache?.models || []).find(m => m.name === name);
if (!modelData) return;
_expandModelRow(row, modelData);
});
});
// Clickable header columns → sort (click again to toggle direction)
el.querySelectorAll('.hwfit-header .hwfit-sortable').forEach(col => {
col.addEventListener('click', () => {
const sortKey = col.dataset.sort;
if (!sortKey) return;
const sel = document.getElementById('hwfit-sort');
if (!sel) return;
// Toggle direction if clicking the same column
if (sel.value === sortKey) {
sel.dataset.reverse = sel.dataset.reverse === '1' ? '0' : '1';
} else {
sel.value = sortKey;
sel.dataset.reverse = '0';
}
_hwfitFetch();
});
});
}
// Read the server currently selected in the scan dropdown and make it the
// active host. Called right before a download/run so the action targets the
// server the user sees selected — defends against the global remoteHost being
// changed elsewhere (e.g. background serve-task handling) between selecting and
// clicking, which was sending downloads to the wrong host.
// Resolve the server the user currently has selected in the scan dropdown and
// return its host string (''/local for local). Also mirrors it into _envState
// for the command preview. The RETURN VALUE is the source of truth passed to
// the download — never trust _envState.remoteHost downstream (multiple copies).
function _syncHostFromScanDropdown() {
const ss = document.getElementById('hwfit-server-select');
if (!ss || ss.value == null) return _envState.remoteHost || '';
let host = '';
if (ss.value === 'local') {
_envState.remoteHost = '';
} else {
const s = _serverByVal(ss.value);
if (s) {
host = s.host;
_envState.remoteHost = s.host;
_envState.env = s.env;
_envState.envPath = s.envPath;
_envState.platform = s.platform || '';
}
}
try { _persistEnvState(); } catch {}
return host;
}
export function _expandModelRow(row, modelData) {
const list = row.closest('.hwfit-list');
if (!list) return;
const existingPanel = list.querySelector('.hwfit-action-panel');
const wasActive = row.classList.contains('hwfit-row-active');
// Remove existing panel and active state
if (existingPanel) existingPanel.remove();
list.querySelectorAll('.hwfit-row-active').forEach(r => r.classList.remove('hwfit-row-active'));
// Toggle: if clicking same row, just close
if (wasActive) return;
row.classList.add('hwfit-row-active');
const { backend, label } = _detectBackend(modelData);
const isVllm = backend === 'vllm';
const isLlamaCpp = backend === 'llamacpp';
const ctx = modelData.context || 8192;
const dlSource = _downloadSourceRepo(modelData, backend);
const hfUrl = `https://huggingface.co/${dlSource.repo}`;
let html = `
`;
html += `
`;
html += `${esc(modelData.name)}${dlSource.kind ? ` (${esc(dlSource.kind)} ${esc(modelData.quant || '')})` : (modelData.quant_repo ? ` (${esc(modelData.quant)})` : '')}`;
html += `${esc(label)}`;
html += `HF \u2197`;
html += `
`;
html += `
`;
html += ``;
if (!modelData.is_image_gen) {
html += ``;
html += ``;
}
html += `