Merge remote-tracking branch 'origin/main' into visual-pr-playground

# Conflicts:
#	routes/cookbook_routes.py
#	routes/hwfit_routes.py
#	services/hwfit/fit.py
#	services/hwfit/models.py
#	static/js/cookbook-diagnosis.js
#	static/js/cookbook-hwfit.js
#	static/js/cookbook.js
#	static/js/cookbookRunning.js
This commit is contained in:
pewdiepie-archdaemon
2026-06-03 16:49:10 +09:00
569 changed files with 35252 additions and 3489 deletions
+104 -15
View File
@@ -213,6 +213,8 @@ export function _renderGpuToggles(system) {
if (quantSel && quantSel.value !== '') {
if (count <= 1) {
quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot
} else if (String(system?.backend || '').toLowerCase() === 'rocm') {
quantSel.value = 'Q4_K_M'; // ROCm default stays GGUF/local-safe; AWQ is explicit only
} else {
quantSel.value = 'AWQ-4bit'; // Multi-GPU -> AWQ for vLLM
}
@@ -244,11 +246,13 @@ function _ctxLabel(value) {
if (!n) return 'Max';
return n >= 1000 ? Math.round(n / 1000) + 'k' : String(n);
}
function _ctxValue() {
const slider = document.getElementById('hwfit-context');
const idx = Math.max(0, Math.min(_CTX_PRESETS.length - 1, Number(slider?.value ?? 3) || 0));
return _CTX_PRESETS[idx] || 0;
}
function _syncCtxControl() {
const slider = document.getElementById('hwfit-context');
const label = document.getElementById('hwfit-context-label');
@@ -359,6 +363,7 @@ function _scanSig() {
o: sortEl?.value || 'score',
r: sortEl?.dataset.reverse === '1' ? 1 : 0,
q: document.getElementById('hwfit-quant')?.value || '',
c: _ctxValue(),
g: (tc && typeof tc._activeCount === 'number') ? String(tc._activeCount) : '',
gg: (tc && tc._activeGroup) ? String(tc._activeGroup) : '',
m: _manualHwParams(),
@@ -408,6 +413,17 @@ function _hwfitShowError(list, host, detail) {
if (rb) rb.addEventListener('click', () => { _resetGpuToggleState(); _hwfitFetch(true); });
}
// Client-side "Engine" filter (llama.cpp / vLLM / SGLang). Empty = show all.
// Uses the same _detectBackend() the serve commands use, so what you filter to
// is exactly what would be launched. Pure view filter — no refetch needed.
function _applyEngineFilter(models) {
const want = document.getElementById('hwfit-engine')?.value || '';
if (!want || !Array.isArray(models)) return models || [];
return models.filter(m => {
try { return _detectBackend(m).backend === want; } catch { return true; }
});
}
export async function _hwfitFetch(fresh = false) {
const _tk = ++_hwfitFetchToken;
const useCase = document.getElementById('hwfit-usecase')?.value || '';
@@ -427,7 +443,7 @@ export async function _hwfitFetch(fresh = false) {
if (_cached) {
_hwfitCache = _cached;
_hwfitRenderHw(hw, _cached.system);
_hwfitRenderList(list, _cached.models);
_hwfitRenderList(list, _applyEngineFilter(_cached.models));
} else {
// Show spinner while scanning — stack the spinner above a text label
// (the .hwfit-loading class is a centered flex ROW, so force column here).
@@ -456,7 +472,9 @@ export async function _hwfitFetch(fresh = false) {
fetch(`/api/model/cached?${_cacheParams}`, { credentials: 'same-origin' })
.then(r => r.json())
.then(d => {
_cachedModelIds = new Set((d.models || []).map(m => m.repo_id));
// Exclude stalled (download-shell) entries — a 12 KB README-only
// folder shouldn't count as "downloaded" in the Scan/Download list.
_cachedModelIds = new Set((d.models || []).filter(m => m.status !== 'stalled').map(m => m.repo_id));
// Re-mark rows if already rendered
list.querySelectorAll('.hwfit-row[data-model]').forEach(row => {
const name = row.dataset.model;
@@ -472,6 +490,7 @@ export async function _hwfitFetch(fresh = false) {
try {
const sortBy = document.getElementById('hwfit-sort')?.value || 'score';
const quantPref = document.getElementById('hwfit-quant')?.value || '';
const targetCtx = _ctxValue();
// Get active GPU count from toggles
const toggleContainer = document.getElementById('hwfit-gpu-toggles');
let gpuCountOverride = '';
@@ -507,6 +526,7 @@ export async function _hwfitFetch(fresh = false) {
if (!isImageMode) {
if (useCase) params.set('use_case', useCase);
if (quantPref) params.set('quant', quantPref);
if (targetCtx) params.set('ctx', String(targetCtx));
}
const endpoint = isImageMode ? `/api/hwfit/image-models?${params}` : `/api/hwfit/models?${params}`;
const res = await fetch(endpoint);
@@ -562,13 +582,26 @@ export async function _hwfitFetch(fresh = false) {
const sortSel = document.getElementById('hwfit-sort');
const sortKey = sortSel?.value || 'score';
const asc = sortSel?.dataset.reverse === '1'; // reversed → ascending (lowest first)
const field = { score: 'score', vram: 'required_gb', speed: 'speed_tps', params: 'params_b', context: 'context' }[sortKey] || 'score';
data.models.sort((a, b) => {
const av = Number(a[field]) || 0, bv = Number(b[field]) || 0;
return asc ? av - bv : bv - av;
});
if (sortKey === 'fit') {
// fit_level is categorical (perfect→good→marginal→too_tight), not numeric,
// so rank it explicitly instead of falling through to the score column.
// Tie-break by score so rows within one fit tier stay meaningfully ordered.
const fitRank = { perfect: 4, good: 3, marginal: 2, too_tight: 1, no_fit: 0 };
data.models.sort((a, b) => {
const ar = fitRank[a.fit_level] ?? -1, br = fitRank[b.fit_level] ?? -1;
if (ar !== br) return asc ? ar - br : br - ar;
const as = Number(a.score) || 0, bs = Number(b.score) || 0;
return asc ? as - bs : bs - as;
});
} else {
const field = { score: 'score', vram: 'required_gb', speed: 'speed_tps', params: 'params_b', context: 'context' }[sortKey] || 'score';
data.models.sort((a, b) => {
const av = Number(a[field]) || 0, bv = Number(b[field]) || 0;
return asc ? av - bv : bv - av;
});
}
}
_hwfitRenderList(list, data.models);
_hwfitRenderList(list, _applyEngineFilter(data.models));
// Persist this result so the next page load can paint it instantly.
_writeScanCache(_sig, data);
// Render GPU toggles — only on first scan (no override active)
@@ -614,8 +647,36 @@ export function _hwfitRenderHw(el, sys) {
};
let gpuChip;
if (sys.gpu_name) {
const label = gpuCount > 1 ? `${gpuCount}x ${esc(sys.gpu_name)}` : esc(sys.gpu_name);
gpuChip = chip('gpu', label);
// Mixed-GPU boxes (#711): `${gpuCount}x ${gpu_name}` uses gpus[0].name for
// every card, so a 4090+3060 reads as "2x RTX 4090". Use gpu_groups (the
// backend already groups identical cards) to render each pool separately
// and put the per-card index+VRAM into the tooltip so it's actually
// useful for picking CUDA_VISIBLE_DEVICES.
const groups = Array.isArray(sys.gpu_groups) ? sys.gpu_groups : [];
// Shorten vendor prefixes so a mixed-GPU label fits in the chip row
// without overflowing. Single-GPU label still shows the full name
// (that's what users are used to seeing). Tooltip carries the full
// unmodified names regardless, so no information is lost.
const _shortGpuName = (n) => String(n || '')
.replace(/^NVIDIA\s+GeForce\s+/i, '')
.replace(/^NVIDIA\s+/i, '')
.replace(/^AMD\s+Radeon\s+/i, '')
.replace(/^AMD\s+/i, '')
.replace(/^Intel\s+/i, '');
let label;
if (groups.length > 1) {
// Heterogeneous: "1× RTX 4090 + 1× RTX 3060"
label = groups.map(g => `${g.count}× ${esc(_shortGpuName(g.name))}`).join(' + ');
} else if (gpuCount > 1) {
label = `${gpuCount}× ${esc(sys.gpu_name)}`;
} else {
label = esc(sys.gpu_name);
}
const gpus = Array.isArray(sys.gpus) ? sys.gpus : [];
const tip = gpus.length
? gpus.map(g => `GPU ${g.index}: ${g.name} · ${(+g.vram_gb).toFixed(1)} GB`).join('\n')
: 'Click to toggle off (X to hide)';
gpuChip = chip('gpu', label, tip);
} else if (sys.gpu_error) {
gpuChip = _removedHwChips.has('gpu')
? ''
@@ -761,8 +822,22 @@ function _wireManualHardwareControls(el) {
export const _fitColors = { perfect: 'var(--green, #50fa7b)', good: 'var(--yellow, #f1fa8c)', marginal: 'var(--orange, #ffb86c)', too_tight: 'var(--red, #ff5555)' };
function _requiresAcceleratorBackend(model) {
const q = String(model?.quant || model?.quantization || '').toUpperCase();
const text = `${model?.name || ''} ${model?.repo_id || ''} ${model?.path || ''}`.toLowerCase();
return /^AWQ|^GPTQ|^NVFP4/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8|nvfp4)\b/i.test(text);
}
function _modeLabel(model) {
if (model?.is_image_gen) return 'image';
if (_requiresAcceleratorBackend(model)) return 'vLLM/SGLang';
const detected = _detectBackend(model);
if (detected?.label) return detected.label;
return String(model?.run_mode || '').replace('_', '+');
}
export const _hwfitColumns = [
{ key: 'score', label: 'Fit', cls: 'hwfit-fit' },
{ key: 'fit', label: 'Fit', cls: 'hwfit-fit' },
{ key: null, label: 'Model', cls: 'hwfit-name' },
{ key: 'params',label: 'Param', cls: 'hwfit-c-params' },
{ key: null, label: 'Quant', cls: 'hwfit-c-quant' },
@@ -783,9 +858,10 @@ export function _hwfitRenderList(el, models) {
const hasHw = sys && ((sys.gpu_vram_gb || 0) > 0 || (sys.total_ram_gb || 0) > 8);
const hasFilters = !!(document.getElementById('hwfit-search')?.value?.trim()
|| document.getElementById('hwfit-usecase')?.value
|| document.getElementById('hwfit-quant')?.value);
|| document.getElementById('hwfit-quant')?.value
|| document.getElementById('hwfit-engine')?.value);
let msg;
if (hasFilters) msg = 'No models match these filters — try clearing the search, use-case, or quant.';
if (hasFilters) msg = 'No models match these filters — try clearing the search, use-case, quant, or engine.';
else if (hasHw) msg = 'No models fit — the hardware probe may have under-reported. Try Rescan.';
else msg = 'No models fit your hardware';
el.innerHTML = `<div class="hwfit-loading">${msg}</div>`;
@@ -827,7 +903,7 @@ export function _hwfitRenderList(el, models) {
const pcount = m.parameter_count || '?';
const ctx = m.context ? (m.context >= 1024 ? (m.context / 1024).toFixed(0) + 'k' : m.context) : '?';
const fitLabel = (m.fit_level || '').replace('_', ' ');
const modeLabel = (m.run_mode || '').replace('_', '+');
const modeLabel = _modeLabel(m);
const vramLabel = m.required_gb ? m.required_gb.toFixed(1) + 'G' : '?';
const moeBadge = m.is_moe ? '<span class="hwfit-badge hwfit-moe">MoE</span>' : '';
const imgBadge = m.is_image_gen ? '<span class="hwfit-badge" style="background:color-mix(in srgb, var(--red) 20%, transparent);color:var(--red);font-size:8px;padding:1px 4px;border-radius:3px;margin-left:4px;">IMG</span>' : '';
@@ -841,7 +917,7 @@ export function _hwfitRenderList(el, models) {
html += `<span class="hwfit-col hwfit-c-ctx">${m.is_image_gen ? '\u2014' : ctx}</span>`;
html += `<span class="hwfit-col hwfit-c-speed">${m.is_image_gen ? '\u2014' : tps + ' t/s'}</span>`;
html += `<span class="hwfit-col hwfit-c-score">${score}</span>`;
html += `<span class="hwfit-col hwfit-c-mode">${m.is_image_gen ? 'image' : esc(modeLabel)}</span>`;
html += `<span class="hwfit-col hwfit-c-mode" title="${_requiresAcceleratorBackend(m) ? 'Requires vLLM or SGLang with a visible CUDA/ROCm accelerator. llama.cpp and Ollama need GGUF files.' : ''}">${esc(modeLabel)}</span>`;
html += `</div>`;
}
el.innerHTML = html;
@@ -941,6 +1017,8 @@ export function _expandModelRow(row, modelData) {
html += `</div>`;
if (modelData.is_image_gen) {
html += `<div style="font-size:10px;opacity:0.5;margin-top:4px;">${esc((modelData.capabilities || []).join(' \u00B7 ') || '')}${modelData.description ? ' \u2014 ' + esc(modelData.description) : ''}</div>`;
} else if (_requiresAcceleratorBackend(modelData)) {
html += `<div class="hwfit-panel-note">This is a safetensors GPU-serving format. Use vLLM/SGLang with a visible CUDA/ROCm accelerator, or pick a GGUF download for llama.cpp/Ollama.</div>`;
}
html += `</div>`;
@@ -1145,6 +1223,17 @@ export function _hwfitInit() {
if (uc) uc.addEventListener('change', () => _hwfitFetch());
if (sort) sort.addEventListener('change', () => _hwfitFetch());
if (qpref) qpref.addEventListener('change', () => _hwfitFetch());
// Engine filter is a pure client-side view filter over the already-fetched
// list, so just re-render from cache instead of re-probing hardware.
const engine = document.getElementById('hwfit-engine');
if (engine) engine.addEventListener('change', () => {
const list = document.getElementById('hwfit-list');
if (list && _hwfitCache && Array.isArray(_hwfitCache.models)) {
_hwfitRenderList(list, _applyEngineFilter(_hwfitCache.models));
} else {
_hwfitFetch();
}
});
if (ctx && !ctx.dataset.bound) {
ctx.dataset.bound = '1';
ctx.addEventListener('input', () => {